llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/Analysis/TargetLibraryInfo.h"
  35 #include "llvm/Analysis/VectorUtils.h"
  36 #include "llvm/CodeGen/DAGCombine.h"
  37 #include "llvm/CodeGen/ISDOpcodes.h"
  38 #include "llvm/CodeGen/MachineFunction.h"
  39 #include "llvm/CodeGen/MachineMemOperand.h"
  40 #include "llvm/CodeGen/RuntimeLibcalls.h"
  41 #include "llvm/CodeGen/SelectionDAG.h"
  42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  43 #include "llvm/CodeGen/SelectionDAGNodes.h"
  44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  45 #include "llvm/CodeGen/TargetLowering.h"
  46 #include "llvm/CodeGen/TargetRegisterInfo.h"
  47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48 #include "llvm/CodeGen/ValueTypes.h"
  49 #include "llvm/IR/Attributes.h"
  50 #include "llvm/IR/Constant.h"
  51 #include "llvm/IR/DataLayout.h"
  52 #include "llvm/IR/DerivedTypes.h"
  53 #include "llvm/IR/Function.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 static cl::opt<bool>
 115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 116                        cl::desc("DAG combiner enable merging multiple stores "
 117                                 "into a wider store"));
 118
 119 static cl::opt<unsigned> TokenFactorInlineLimit(
 120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 121     cl::desc("Limit the number of operands to inline for Token Factors"));
 122
 123 static cl::opt<unsigned> StoreMergeDependenceLimit(
 124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 126              "to bail out in store merging dependence check"));
 127
 128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
 129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
 130     cl::desc("DAG combiner enable reducing the width of load/op/store "
 131              "sequence"));
 132
 133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
 134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
 135     cl::desc("DAG combiner enable load/<replace bytes>/store with "
 136              "a narrower store"));
 137
 138 namespace {
 139
 140   class DAGCombiner {
 141     SelectionDAG &DAG;
 142     const TargetLowering &TLI;
 143     const SelectionDAGTargetInfo *STI;
 144     CombineLevel Level = BeforeLegalizeTypes;
 145     CodeGenOpt::Level OptLevel;
 146     bool LegalDAG = false;
 147     bool LegalOperations = false;
 148     bool LegalTypes = false;
 149     bool ForCodeSize;
 150     bool DisableGenericCombines;
 151
 152     /// Worklist of all of the nodes that need to be simplified.
 153     ///
 154     /// This must behave as a stack -- new nodes to process are pushed onto the
 155     /// back and when processing we pop off of the back.
 156     ///
 157     /// The worklist will not contain duplicates but may contain null entries
 158     /// due to nodes being deleted from the underlying DAG.
 159     SmallVector<SDNode *, 64> Worklist;
 160
 161     /// Mapping from an SDNode to its position on the worklist.
 162     ///
 163     /// This is used to find and remove nodes from the worklist (by nulling
 164     /// them) when they are deleted from the underlying DAG. It relies on
 165     /// stable indices of nodes within the worklist.
 166     DenseMap<SDNode *, unsigned> WorklistMap;
 167     /// This records all nodes attempted to add to the worklist since we
 168     /// considered a new worklist entry. As we keep do not add duplicate nodes
 169     /// in the worklist, this is different from the tail of the worklist.
 170     SmallSetVector<SDNode *, 32> PruningList;
 171
 172     /// Set of nodes which have been combined (at least once).
 173     ///
 174     /// This is used to allow us to reliably add any operands of a DAG node
 175     /// which have not yet been combined to the worklist.
 176     SmallPtrSet<SDNode *, 32> CombinedNodes;
 177
 178     /// Map from candidate StoreNode to the pair of RootNode and count.
 179     /// The count is used to track how many times we have seen the StoreNode
 180     /// with the same RootNode bail out in dependence check. If we have seen
 181     /// the bail out for the same pair many times over a limit, we won't
 182     /// consider the StoreNode with the same RootNode as store merging
 183     /// candidate again.
 184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 185
 186     // AA - Used for DAG load/store alias analysis.
 187     AliasAnalysis *AA;
 188
 189     /// When an instruction is simplified, add all users of the instruction to
 190     /// the work lists because they might get more simplified now.
 191     void AddUsersToWorklist(SDNode *N) {
 192       for (SDNode *Node : N->uses())
 193         AddToWorklist(Node);
 194     }
 195
 196     /// Convenient shorthand to add a node and all of its user to the worklist.
 197     void AddToWorklistWithUsers(SDNode *N) {
 198       AddUsersToWorklist(N);
 199       AddToWorklist(N);
 200     }
 201
 202     // Prune potentially dangling nodes. This is called after
 203     // any visit to a node, but should also be called during a visit after any
 204     // failed combine which may have created a DAG node.
 205     void clearAddedDanglingWorklistEntries() {
 206       // Check any nodes added to the worklist to see if they are prunable.
 207       while (!PruningList.empty()) {
 208         auto *N = PruningList.pop_back_val();
 209         if (N->use_empty())
 210           recursivelyDeleteUnusedNodes(N);
 211       }
 212     }
 213
 214     SDNode *getNextWorklistEntry() {
 215       // Before we do any work, remove nodes that are not in use.
 216       clearAddedDanglingWorklistEntries();
 217       SDNode *N = nullptr;
 218       // The Worklist holds the SDNodes in order, but it may contain null
 219       // entries.
 220       while (!N && !Worklist.empty()) {
 221         N = Worklist.pop_back_val();
 222       }
 223
 224       if (N) {
 225         bool GoodWorklistEntry = WorklistMap.erase(N);
 226         (void)GoodWorklistEntry;
 227         assert(GoodWorklistEntry &&
 228                "Found a worklist entry without a corresponding map entry!");
 229       }
 230       return N;
 231     }
 232
 233     /// Call the node-specific routine that folds each particular type of node.
 234     SDValue visit(SDNode *N);
 235
 236   public:
 237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 238         : DAG(D), TLI(D.getTargetLoweringInfo()),
 239           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
 240       ForCodeSize = DAG.shouldOptForSize();
 241       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 242
 243       MaximumLegalStoreInBits = 0;
 244       // We use the minimum store size here, since that's all we can guarantee
 245       // for the scalable vector types.
 246       for (MVT VT : MVT::all_valuetypes())
 247         if (EVT(VT).isSimple() && VT != MVT::Other &&
 248             TLI.isTypeLegal(EVT(VT)) &&
 249             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
 250           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
 251     }
 252
 253     void ConsiderForPruning(SDNode *N) {
 254       // Mark this for potential pruning.
 255       PruningList.insert(N);
 256     }
 257
 258     /// Add to the worklist making sure its instance is at the back (next to be
 259     /// processed.)
 260     void AddToWorklist(SDNode *N) {
 261       assert(N->getOpcode() != ISD::DELETED_NODE &&
 262              "Deleted Node added to Worklist");
 263
 264       // Skip handle nodes as they can't usefully be combined and confuse the
 265       // zero-use deletion strategy.
 266       if (N->getOpcode() == ISD::HANDLENODE)
 267         return;
 268
 269       ConsiderForPruning(N);
 270
 271       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 272         Worklist.push_back(N);
 273     }
 274
 275     /// Remove all instances of N from the worklist.
 276     void removeFromWorklist(SDNode *N) {
 277       CombinedNodes.erase(N);
 278       PruningList.remove(N);
 279       StoreRootCountMap.erase(N);
 280
 281       auto It = WorklistMap.find(N);
 282       if (It == WorklistMap.end())
 283         return; // Not in the worklist.
 284
 285       // Null out the entry rather than erasing it to avoid a linear operation.
 286       Worklist[It->second] = nullptr;
 287       WorklistMap.erase(It);
 288     }
 289
 290     void deleteAndRecombine(SDNode *N);
 291     bool recursivelyDeleteUnusedNodes(SDNode *N);
 292
 293     /// Replaces all uses of the results of one DAG node with new values.
 294     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 295                       bool AddTo = true);
 296
 297     /// Replaces all uses of the results of one DAG node with new values.
 298     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 299       return CombineTo(N, &Res, 1, AddTo);
 300     }
 301
 302     /// Replaces all uses of the results of one DAG node with new values.
 303     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 304                       bool AddTo = true) {
 305       SDValue To[] = { Res0, Res1 };
 306       return CombineTo(N, To, 2, AddTo);
 307     }
 308
 309     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 310
 311   private:
 312     unsigned MaximumLegalStoreInBits;
 313
 314     /// Check the specified integer node value to see if it can be simplified or
 315     /// if things it uses can be simplified by bit propagation.
 316     /// If so, return true.
 317     bool SimplifyDemandedBits(SDValue Op) {
 318       unsigned BitWidth = Op.getScalarValueSizeInBits();
 319       APInt DemandedBits = APInt::getAllOnes(BitWidth);
 320       return SimplifyDemandedBits(Op, DemandedBits);
 321     }
 322
 323     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 324       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 325       KnownBits Known;
 326       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
 327         return false;
 328
 329       // Revisit the node.
 330       AddToWorklist(Op.getNode());
 331
 332       CommitTargetLoweringOpt(TLO);
 333       return true;
 334     }
 335
 336     /// Check the specified vector node value to see if it can be simplified or
 337     /// if things it uses can be simplified as it only uses some of the
 338     /// elements. If so, return true.
 339     bool SimplifyDemandedVectorElts(SDValue Op) {
 340       // TODO: For now just pretend it cannot be simplified.
 341       if (Op.getValueType().isScalableVector())
 342         return false;
 343
 344       unsigned NumElts = Op.getValueType().getVectorNumElements();
 345       APInt DemandedElts = APInt::getAllOnes(NumElts);
 346       return SimplifyDemandedVectorElts(Op, DemandedElts);
 347     }
 348
 349     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 350                               const APInt &DemandedElts,
 351                               bool AssumeSingleUse = false);
 352     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 353                                     bool AssumeSingleUse = false);
 354
 355     bool CombineToPreIndexedLoadStore(SDNode *N);
 356     bool CombineToPostIndexedLoadStore(SDNode *N);
 357     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 358     bool SliceUpLoad(SDNode *N);
 359
 360     // Scalars have size 0 to distinguish from singleton vectors.
 361     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 362     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 363     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 364
 365     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 366     ///   load.
 367     ///
 368     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 369     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 370     /// \param EltNo index of the vector element to load.
 371     /// \param OriginalLoad load that EVE came from to be replaced.
 372     /// \returns EVE on success SDValue() on failure.
 373     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 374                                          SDValue EltNo,
 375                                          LoadSDNode *OriginalLoad);
 376     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 377     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 378     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 379     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 380     SDValue PromoteIntBinOp(SDValue Op);
 381     SDValue PromoteIntShiftOp(SDValue Op);
 382     SDValue PromoteExtend(SDValue Op);
 383     bool PromoteLoad(SDValue Op);
 384
 385     /// Call the node-specific routine that knows how to fold each
 386     /// particular type of node. If that doesn't do anything, try the
 387     /// target-specific DAG combines.
 388     SDValue combine(SDNode *N);
 389
 390     // Visitation implementation - Implement dag node combining for different
 391     // node types.  The semantics are as follows:
 392     // Return Value:
 393     //   SDValue.getNode() == 0 - No change was made
 394     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 395     //   otherwise              - N should be replaced by the returned Operand.
 396     //
 397     SDValue visitTokenFactor(SDNode *N);
 398     SDValue visitMERGE_VALUES(SDNode *N);
 399     SDValue visitADD(SDNode *N);
 400     SDValue visitADDLike(SDNode *N);
 401     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 402     SDValue visitSUB(SDNode *N);
 403     SDValue visitADDSAT(SDNode *N);
 404     SDValue visitSUBSAT(SDNode *N);
 405     SDValue visitADDC(SDNode *N);
 406     SDValue visitADDO(SDNode *N);
 407     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 408     SDValue visitSUBC(SDNode *N);
 409     SDValue visitSUBO(SDNode *N);
 410     SDValue visitADDE(SDNode *N);
 411     SDValue visitADDCARRY(SDNode *N);
 412     SDValue visitSADDO_CARRY(SDNode *N);
 413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 414     SDValue visitSUBE(SDNode *N);
 415     SDValue visitSUBCARRY(SDNode *N);
 416     SDValue visitSSUBO_CARRY(SDNode *N);
 417     SDValue visitMUL(SDNode *N);
 418     SDValue visitMULFIX(SDNode *N);
 419     SDValue useDivRem(SDNode *N);
 420     SDValue visitSDIV(SDNode *N);
 421     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 422     SDValue visitUDIV(SDNode *N);
 423     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 424     SDValue visitREM(SDNode *N);
 425     SDValue visitMULHU(SDNode *N);
 426     SDValue visitMULHS(SDNode *N);
 427     SDValue visitAVG(SDNode *N);
 428     SDValue visitSMUL_LOHI(SDNode *N);
 429     SDValue visitUMUL_LOHI(SDNode *N);
 430     SDValue visitMULO(SDNode *N);
 431     SDValue visitIMINMAX(SDNode *N);
 432     SDValue visitAND(SDNode *N);
 433     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 434     SDValue visitOR(SDNode *N);
 435     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 436     SDValue visitXOR(SDNode *N);
 437     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
 438     SDValue visitSHL(SDNode *N);
 439     SDValue visitSRA(SDNode *N);
 440     SDValue visitSRL(SDNode *N);
 441     SDValue visitFunnelShift(SDNode *N);
 442     SDValue visitSHLSAT(SDNode *N);
 443     SDValue visitRotate(SDNode *N);
 444     SDValue visitABS(SDNode *N);
 445     SDValue visitBSWAP(SDNode *N);
 446     SDValue visitBITREVERSE(SDNode *N);
 447     SDValue visitCTLZ(SDNode *N);
 448     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 449     SDValue visitCTTZ(SDNode *N);
 450     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 451     SDValue visitCTPOP(SDNode *N);
 452     SDValue visitSELECT(SDNode *N);
 453     SDValue visitVSELECT(SDNode *N);
 454     SDValue visitSELECT_CC(SDNode *N);
 455     SDValue visitSETCC(SDNode *N);
 456     SDValue visitSETCCCARRY(SDNode *N);
 457     SDValue visitSIGN_EXTEND(SDNode *N);
 458     SDValue visitZERO_EXTEND(SDNode *N);
 459     SDValue visitANY_EXTEND(SDNode *N);
 460     SDValue visitAssertExt(SDNode *N);
 461     SDValue visitAssertAlign(SDNode *N);
 462     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 463     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
 464     SDValue visitTRUNCATE(SDNode *N);
 465     SDValue visitBITCAST(SDNode *N);
 466     SDValue visitFREEZE(SDNode *N);
 467     SDValue visitBUILD_PAIR(SDNode *N);
 468     SDValue visitFADD(SDNode *N);
 469     SDValue visitSTRICT_FADD(SDNode *N);
 470     SDValue visitFSUB(SDNode *N);
 471     SDValue visitFMUL(SDNode *N);
 472     SDValue visitFMA(SDNode *N);
 473     SDValue visitFDIV(SDNode *N);
 474     SDValue visitFREM(SDNode *N);
 475     SDValue visitFSQRT(SDNode *N);
 476     SDValue visitFCOPYSIGN(SDNode *N);
 477     SDValue visitFPOW(SDNode *N);
 478     SDValue visitSINT_TO_FP(SDNode *N);
 479     SDValue visitUINT_TO_FP(SDNode *N);
 480     SDValue visitFP_TO_SINT(SDNode *N);
 481     SDValue visitFP_TO_UINT(SDNode *N);
 482     SDValue visitFP_ROUND(SDNode *N);
 483     SDValue visitFP_EXTEND(SDNode *N);
 484     SDValue visitFNEG(SDNode *N);
 485     SDValue visitFABS(SDNode *N);
 486     SDValue visitFCEIL(SDNode *N);
 487     SDValue visitFTRUNC(SDNode *N);
 488     SDValue visitFFLOOR(SDNode *N);
 489     SDValue visitFMinMax(SDNode *N);
 490     SDValue visitBRCOND(SDNode *N);
 491     SDValue visitBR_CC(SDNode *N);
 492     SDValue visitLOAD(SDNode *N);
 493
 494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 496
 497     SDValue visitSTORE(SDNode *N);
 498     SDValue visitLIFETIME_END(SDNode *N);
 499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 501     SDValue visitBUILD_VECTOR(SDNode *N);
 502     SDValue visitCONCAT_VECTORS(SDNode *N);
 503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 507     SDValue visitMLOAD(SDNode *N);
 508     SDValue visitMSTORE(SDNode *N);
 509     SDValue visitMGATHER(SDNode *N);
 510     SDValue visitMSCATTER(SDNode *N);
 511     SDValue visitFP_TO_FP16(SDNode *N);
 512     SDValue visitFP16_TO_FP(SDNode *N);
 513     SDValue visitFP_TO_BF16(SDNode *N);
 514     SDValue visitVECREDUCE(SDNode *N);
 515     SDValue visitVPOp(SDNode *N);
 516
 517     SDValue visitFADDForFMACombine(SDNode *N);
 518     SDValue visitFSUBForFMACombine(SDNode *N);
 519     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 520
 521     SDValue XformToShuffleWithZero(SDNode *N);
 522     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 523                                                     const SDLoc &DL,
 524                                                     SDNode *N,
 525                                                     SDValue N0,
 526                                                     SDValue N1);
 527     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 528                                       SDValue N1);
 529     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 530                            SDValue N1, SDNodeFlags Flags);
 531
 532     SDValue visitShiftByConstant(SDNode *N);
 533
 534     SDValue foldSelectOfConstants(SDNode *N);
 535     SDValue foldVSelectOfConstants(SDNode *N);
 536     SDValue foldBinOpIntoSelect(SDNode *BO);
 537     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 538     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 539     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 540     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 541                              SDValue N2, SDValue N3, ISD::CondCode CC,
 542                              bool NotExtCompare = false);
 543     SDValue convertSelectOfFPConstantsToLoadOffset(
 544         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 545         ISD::CondCode CC);
 546     SDValue foldSignChangeInBitcast(SDNode *N);
 547     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 548                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 549     SDValue foldSelectOfBinops(SDNode *N);
 550     SDValue foldSextSetcc(SDNode *N);
 551     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 552                               const SDLoc &DL);
 553     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
 554     SDValue unfoldMaskedMerge(SDNode *N);
 555     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 556     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 557                           const SDLoc &DL, bool foldBooleans);
 558     SDValue rebuildSetCC(SDValue N);
 559
 560     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 561                            SDValue &CC, bool MatchStrict = false) const;
 562     bool isOneUseSetCC(SDValue N) const;
 563
 564     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 565                                          unsigned HiOp);
 566     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 567     SDValue CombineExtLoad(SDNode *N);
 568     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 569     SDValue combineRepeatedFPDivisors(SDNode *N);
 570     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 571     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 572     SDValue BuildSDIV(SDNode *N);
 573     SDValue BuildSDIVPow2(SDNode *N);
 574     SDValue BuildUDIV(SDNode *N);
 575     SDValue BuildSREMPow2(SDNode *N);
 576     SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
 577     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 578     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 579     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 580     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 581     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 582     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 583                                 SDNodeFlags Flags, bool Reciprocal);
 584     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 585                                 SDNodeFlags Flags, bool Reciprocal);
 586     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 587                                bool DemandHighBits = true);
 588     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 589     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 590                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
 591                               unsigned PosOpcode, unsigned NegOpcode,
 592                               const SDLoc &DL);
 593     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
 594                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
 595                               unsigned PosOpcode, unsigned NegOpcode,
 596                               const SDLoc &DL);
 597     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 598     SDValue MatchLoadCombine(SDNode *N);
 599     SDValue mergeTruncStores(StoreSDNode *N);
 600     SDValue reduceLoadWidth(SDNode *N);
 601     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 602     SDValue splitMergedValStore(StoreSDNode *ST);
 603     SDValue TransformFPLoadStorePair(SDNode *N);
 604     SDValue convertBuildVecZextToZext(SDNode *N);
 605     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 606     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
 607     SDValue reduceBuildVecToShuffle(SDNode *N);
 608     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 609                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 610                                   SDValue VecIn2, unsigned LeftIdx,
 611                                   bool DidSplitVec);
 612     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 613
 614     /// Walk up chain skipping non-aliasing memory nodes,
 615     /// looking for aliasing nodes and adding them to the Aliases vector.
 616     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 617                           SmallVectorImpl<SDValue> &Aliases);
 618
 619     /// Return true if there is any possibility that the two addresses overlap.
 620     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
 621
 622     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 623     /// chain (aliasing node.)
 624     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 625
 626     /// Try to replace a store and any possibly adjacent stores on
 627     /// consecutive chains with better chains. Return true only if St is
 628     /// replaced.
 629     ///
 630     /// Notice that other chains may still be replaced even if the function
 631     /// returns false.
 632     bool findBetterNeighborChains(StoreSDNode *St);
 633
 634     // Helper for findBetterNeighborChains. Walk up store chain add additional
 635     // chained stores that do not overlap and can be parallelized.
 636     bool parallelizeChainedStores(StoreSDNode *St);
 637
 638     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 639     /// is located in a sequence of memory operations connected by a chain.
 640     struct MemOpLink {
 641       // Ptr to the mem node.
 642       LSBaseSDNode *MemNode;
 643
 644       // Offset from the base ptr.
 645       int64_t OffsetFromBase;
 646
 647       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 648           : MemNode(N), OffsetFromBase(Offset) {}
 649     };
 650
 651     // Classify the origin of a stored value.
 652     enum class StoreSource { Unknown, Constant, Extract, Load };
 653     StoreSource getStoreSource(SDValue StoreVal) {
 654       switch (StoreVal.getOpcode()) {
 655       case ISD::Constant:
 656       case ISD::ConstantFP:
 657         return StoreSource::Constant;
 658       case ISD::EXTRACT_VECTOR_ELT:
 659       case ISD::EXTRACT_SUBVECTOR:
 660         return StoreSource::Extract;
 661       case ISD::LOAD:
 662         return StoreSource::Load;
 663       default:
 664         return StoreSource::Unknown;
 665       }
 666     }
 667
 668     /// This is a helper function for visitMUL to check the profitability
 669     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 670     /// MulNode is the original multiply, AddNode is (add x, c1),
 671     /// and ConstNode is c2.
 672     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
 673                                      SDValue ConstNode);
 674
 675     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 676     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 677     /// the type of the loaded value to be extended.
 678     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 679                           EVT LoadResultTy, EVT &ExtVT);
 680
 681     /// Helper function to calculate whether the given Load/Store can have its
 682     /// width reduced to ExtVT.
 683     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 684                            EVT &MemVT, unsigned ShAmt = 0);
 685
 686     /// Used by BackwardsPropagateMask to find suitable loads.
 687     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 688                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 689                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 690     /// Attempt to propagate a given AND node back to load leaves so that they
 691     /// can be combined into narrow loads.
 692     bool BackwardsPropagateMask(SDNode *N);
 693
 694     /// Helper function for mergeConsecutiveStores which merges the component
 695     /// store chains.
 696     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 697                                 unsigned NumStores);
 698
 699     /// This is a helper function for mergeConsecutiveStores. When the source
 700     /// elements of the consecutive stores are all constants or all extracted
 701     /// vector elements, try to merge them into one larger store introducing
 702     /// bitcasts if necessary.  \return True if a merged store was created.
 703     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 704                                          EVT MemVT, unsigned NumStores,
 705                                          bool IsConstantSrc, bool UseVector,
 706                                          bool UseTrunc);
 707
 708     /// This is a helper function for mergeConsecutiveStores. Stores that
 709     /// potentially may be merged with St are placed in StoreNodes. RootNode is
 710     /// a chain predecessor to all store candidates.
 711     void getStoreMergeCandidates(StoreSDNode *St,
 712                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 713                                  SDNode *&Root);
 714
 715     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 716     /// have indirect dependency through their operands. RootNode is the
 717     /// predecessor to all stores calculated by getStoreMergeCandidates and is
 718     /// used to prune the dependency check. \return True if safe to merge.
 719     bool checkMergeStoreCandidatesForDependencies(
 720         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 721         SDNode *RootNode);
 722
 723     /// This is a helper function for mergeConsecutiveStores. Given a list of
 724     /// store candidates, find the first N that are consecutive in memory.
 725     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
 726     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
 727                                   int64_t ElementSizeBytes) const;
 728
 729     /// This is a helper function for mergeConsecutiveStores. It is used for
 730     /// store chains that are composed entirely of constant values.
 731     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
 732                                   unsigned NumConsecutiveStores,
 733                                   EVT MemVT, SDNode *Root, bool AllowVectors);
 734
 735     /// This is a helper function for mergeConsecutiveStores. It is used for
 736     /// store chains that are composed entirely of extracted vector elements.
 737     /// When extracting multiple vector elements, try to store them in one
 738     /// vector store rather than a sequence of scalar stores.
 739     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
 740                                  unsigned NumConsecutiveStores, EVT MemVT,
 741                                  SDNode *Root);
 742
 743     /// This is a helper function for mergeConsecutiveStores. It is used for
 744     /// store chains that are composed entirely of loaded values.
 745     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
 746                               unsigned NumConsecutiveStores, EVT MemVT,
 747                               SDNode *Root, bool AllowVectors,
 748                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
 749
 750     /// Merge consecutive store operations into a wide store.
 751     /// This optimization uses wide integers or vectors when possible.
 752     /// \return true if stores were merged.
 753     bool mergeConsecutiveStores(StoreSDNode *St);
 754
 755     /// Try to transform a truncation where C is a constant:
 756     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 757     ///
 758     /// \p N needs to be a truncation and its first operand an AND. Other
 759     /// requirements are checked by the function (e.g. that trunc is
 760     /// single-use) and if missed an empty SDValue is returned.
 761     SDValue distributeTruncateThroughAnd(SDNode *N);
 762
 763     /// Helper function to determine whether the target supports operation
 764     /// given by \p Opcode for type \p VT, that is, whether the operation
 765     /// is legal or custom before legalizing operations, and whether is
 766     /// legal (but not custom) after legalization.
 767     bool hasOperation(unsigned Opcode, EVT VT) {
 768       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
 769     }
 770
 771   public:
 772     /// Runs the dag combiner on all nodes in the work list
 773     void Run(CombineLevel AtLevel);
 774
 775     SelectionDAG &getDAG() const { return DAG; }
 776
 777     /// Returns a type large enough to hold any valid shift amount - before type
 778     /// legalization these can be huge.
 779     EVT getShiftAmountTy(EVT LHSTy) {
 780       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 781       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 782     }
 783
 784     /// This method returns true if we are running before type legalization or
 785     /// if the specified VT is legal.
 786     bool isTypeLegal(const EVT &VT) {
 787       if (!LegalTypes) return true;
 788       return TLI.isTypeLegal(VT);
 789     }
 790
 791     /// Convenience wrapper around TargetLowering::getSetCCResultType
 792     EVT getSetCCResultType(EVT VT) const {
 793       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 794     }
 795
 796     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 797                          SDValue OrigLoad, SDValue ExtLoad,
 798                          ISD::NodeType ExtType);
 799   };
 800
 801 /// This class is a DAGUpdateListener that removes any deleted
 802 /// nodes from the worklist.
 803 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 804   DAGCombiner &DC;
 805
 806 public:
 807   explicit WorklistRemover(DAGCombiner &dc)
 808     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 809
 810   void NodeDeleted(SDNode *N, SDNode *E) override {
 811     DC.removeFromWorklist(N);
 812   }
 813 };
 814
 815 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 816   DAGCombiner &DC;
 817
 818 public:
 819   explicit WorklistInserter(DAGCombiner &dc)
 820       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 821
 822   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 823   //        compile time costs in large DAGs, e.g. Halide.
 824   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 825 };
 826
 827 } // end anonymous namespace
 828
 829 //===----------------------------------------------------------------------===//
 830 //  TargetLowering::DAGCombinerInfo implementation
 831 //===----------------------------------------------------------------------===//
 832
 833 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 834   ((DAGCombiner*)DC)->AddToWorklist(N);
 835 }
 836
 837 SDValue TargetLowering::DAGCombinerInfo::
 838 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 839   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 840 }
 841
 842 SDValue TargetLowering::DAGCombinerInfo::
 843 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 844   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 845 }
 846
 847 SDValue TargetLowering::DAGCombinerInfo::
 848 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 849   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 850 }
 851
 852 bool TargetLowering::DAGCombinerInfo::
 853 recursivelyDeleteUnusedNodes(SDNode *N) {
 854   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 855 }
 856
 857 void TargetLowering::DAGCombinerInfo::
 858 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 859   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 860 }
 861
 862 //===----------------------------------------------------------------------===//
 863 // Helper Functions
 864 //===----------------------------------------------------------------------===//
 865
 866 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 867   removeFromWorklist(N);
 868
 869   // If the operands of this node are only used by the node, they will now be
 870   // dead. Make sure to re-visit them and recursively delete dead nodes.
 871   for (const SDValue &Op : N->ops())
 872     // For an operand generating multiple values, one of the values may
 873     // become dead allowing further simplification (e.g. split index
 874     // arithmetic from an indexed load).
 875     if (Op->hasOneUse() || Op->getNumValues() > 1)
 876       AddToWorklist(Op.getNode());
 877
 878   DAG.DeleteNode(N);
 879 }
 880
 881 // APInts must be the same size for most operations, this helper
 882 // function zero extends the shorter of the pair so that they match.
 883 // We provide an Offset so that we can create bitwidths that won't overflow.
 884 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 885   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 886   LHS = LHS.zext(Bits);
 887   RHS = RHS.zext(Bits);
 888 }
 889
 890 // Return true if this node is a setcc, or is a select_cc
 891 // that selects between the target values used for true and false, making it
 892 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 893 // the appropriate nodes based on the type of node we are checking. This
 894 // simplifies life a bit for the callers.
 895 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 896                                     SDValue &CC, bool MatchStrict) const {
 897   if (N.getOpcode() == ISD::SETCC) {
 898     LHS = N.getOperand(0);
 899     RHS = N.getOperand(1);
 900     CC  = N.getOperand(2);
 901     return true;
 902   }
 903
 904   if (MatchStrict &&
 905       (N.getOpcode() == ISD::STRICT_FSETCC ||
 906        N.getOpcode() == ISD::STRICT_FSETCCS)) {
 907     LHS = N.getOperand(1);
 908     RHS = N.getOperand(2);
 909     CC  = N.getOperand(3);
 910     return true;
 911   }
 912
 913   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
 914       !TLI.isConstFalseVal(N.getOperand(3)))
 915     return false;
 916
 917   if (TLI.getBooleanContents(N.getValueType()) ==
 918       TargetLowering::UndefinedBooleanContent)
 919     return false;
 920
 921   LHS = N.getOperand(0);
 922   RHS = N.getOperand(1);
 923   CC  = N.getOperand(4);
 924   return true;
 925 }
 926
 927 /// Return true if this is a SetCC-equivalent operation with only one use.
 928 /// If this is true, it allows the users to invert the operation for free when
 929 /// it is profitable to do so.
 930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 931   SDValue N0, N1, N2;
 932   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
 933     return true;
 934   return false;
 935 }
 936
 937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 938   if (!ScalarTy.isSimple())
 939     return false;
 940
 941   uint64_t MaskForTy = 0ULL;
 942   switch (ScalarTy.getSimpleVT().SimpleTy) {
 943   case MVT::i8:
 944     MaskForTy = 0xFFULL;
 945     break;
 946   case MVT::i16:
 947     MaskForTy = 0xFFFFULL;
 948     break;
 949   case MVT::i32:
 950     MaskForTy = 0xFFFFFFFFULL;
 951     break;
 952   default:
 953     return false;
 954     break;
 955   }
 956
 957   APInt Val;
 958   if (ISD::isConstantSplatVector(N, Val))
 959     return Val.getLimitedValue() == MaskForTy;
 960
 961   return false;
 962 }
 963
 964 // Determines if it is a constant integer or a splat/build vector of constant
 965 // integers (and undefs).
 966 // Do not permit build vector implicit truncation.
 967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 968   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 969     return !(Const->isOpaque() && NoOpaques);
 970   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
 971     return false;
 972   unsigned BitWidth = N.getScalarValueSizeInBits();
 973   for (const SDValue &Op : N->op_values()) {
 974     if (Op.isUndef())
 975       continue;
 976     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 977     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 978         (Const->isOpaque() && NoOpaques))
 979       return false;
 980   }
 981   return true;
 982 }
 983
 984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 985 // undef's.
 986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 987   if (V.getOpcode() != ISD::BUILD_VECTOR)
 988     return false;
 989   return isConstantOrConstantVector(V, NoOpaques) ||
 990          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 991 }
 992
 993 // Determine if this an indexed load with an opaque target constant index.
 994 static bool canSplitIdx(LoadSDNode *LD) {
 995   return MaySplitLoadIndex &&
 996          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
 997           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
 998 }
 999
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001                                                              const SDLoc &DL,
1002                                                              SDNode *N,
1003                                                              SDValue N0,
1004                                                              SDValue N1) {
1005   // Currently this only tries to ensure we don't undo the GEP splits done by
1006   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1007   // we check if the following transformation would be problematic:
1008   // (load/store (add, (add, x, offset1), offset2)) ->
1009   // (load/store (add, x, offset1+offset2)).
1010
1011   // (load/store (add, (add, x, y), offset2)) ->
1012   // (load/store (add, (add, x, offset2), y)).
1013
1014   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1015     return false;
1016
1017   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018   if (!C2)
1019     return false;
1020
1021   const APInt &C2APIntVal = C2->getAPIntValue();
1022   if (C2APIntVal.getSignificantBits() > 64)
1023     return false;
1024
1025   if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1026     if (N0.hasOneUse())
1027       return false;
1028
1029     const APInt &C1APIntVal = C1->getAPIntValue();
1030     const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1031     if (CombinedValueIntVal.getSignificantBits() > 64)
1032       return false;
1033     const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1034
1035     for (SDNode *Node : N->uses()) {
1036       if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1037         // Is x[offset2] already not a legal addressing mode? If so then
1038         // reassociating the constants breaks nothing (we test offset2 because
1039         // that's the one we hope to fold into the load or store).
1040         TargetLoweringBase::AddrMode AM;
1041         AM.HasBaseReg = true;
1042         AM.BaseOffs = C2APIntVal.getSExtValue();
1043         EVT VT = LoadStore->getMemoryVT();
1044         unsigned AS = LoadStore->getAddressSpace();
1045         Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1046         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047           continue;
1048
1049         // Would x[offset1+offset2] still be a legal addressing mode?
1050         AM.BaseOffs = CombinedValue;
1051         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1052           return true;
1053       }
1054     }
1055   } else {
1056     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1057       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1058         return false;
1059
1060     for (SDNode *Node : N->uses()) {
1061       auto *LoadStore = dyn_cast<MemSDNode>(Node);
1062       if (!LoadStore)
1063         return false;
1064
1065       // Is x[offset2] a legal addressing mode? If so then
1066       // reassociating the constants breaks address pattern
1067       TargetLoweringBase::AddrMode AM;
1068       AM.HasBaseReg = true;
1069       AM.BaseOffs = C2APIntVal.getSExtValue();
1070       EVT VT = LoadStore->getMemoryVT();
1071       unsigned AS = LoadStore->getAddressSpace();
1072       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1073       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1074         return false;
1075     }
1076     return true;
1077   }
1078
1079   return false;
1080 }
1081
1082 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1083 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1084 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1085                                                SDValue N0, SDValue N1) {
1086   EVT VT = N0.getValueType();
1087
1088   if (N0.getOpcode() != Opc)
1089     return SDValue();
1090
1091   SDValue N00 = N0.getOperand(0);
1092   SDValue N01 = N0.getOperand(1);
1093
1094   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1095     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1096       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1097       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1098         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1099       return SDValue();
1100     }
1101     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1102       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1103       //              iff (op x, c1) has one use
1104       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1105       return DAG.getNode(Opc, DL, VT, OpNode, N01);
1106     }
1107   }
1108
1109   // Check for repeated operand logic simplifications.
1110   if (Opc == ISD::AND || Opc == ISD::OR) {
1111     // (N00 & N01) & N00 --> N00 & N01
1112     // (N00 & N01) & N01 --> N00 & N01
1113     // (N00 | N01) | N00 --> N00 | N01
1114     // (N00 | N01) | N01 --> N00 | N01
1115     if (N1 == N00 || N1 == N01)
1116       return N0;
1117   }
1118   if (Opc == ISD::XOR) {
1119     // (N00 ^ N01) ^ N00 --> N01
1120     if (N1 == N00)
1121       return N01;
1122     // (N00 ^ N01) ^ N01 --> N00
1123     if (N1 == N01)
1124       return N00;
1125   }
1126
1127   if (TLI.isReassocProfitable(DAG, N0, N1)) {
1128     if (N1 != N01) {
1129       // Reassociate if (op N00, N1) already exist
1130       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1131         // if Op (Op N00, N1), N01 already exist
1132         // we need to stop reassciate to avoid dead loop
1133         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1134           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1135       }
1136     }
1137
1138     if (N1 != N00) {
1139       // Reassociate if (op N01, N1) already exist
1140       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1141         // if Op (Op N01, N1), N00 already exist
1142         // we need to stop reassciate to avoid dead loop
1143         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1144           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1145       }
1146     }
1147   }
1148
1149   return SDValue();
1150 }
1151
1152 // Try to reassociate commutative binops.
1153 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1154                                     SDValue N1, SDNodeFlags Flags) {
1155   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1156
1157   // Floating-point reassociation is not allowed without loose FP math.
1158   if (N0.getValueType().isFloatingPoint() ||
1159       N1.getValueType().isFloatingPoint())
1160     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1161       return SDValue();
1162
1163   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1164     return Combined;
1165   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1166     return Combined;
1167   return SDValue();
1168 }
1169
1170 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1171                                bool AddTo) {
1172   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1173   ++NodesCombined;
1174   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1175              To[0].dump(&DAG);
1176              dbgs() << " and " << NumTo - 1 << " other values\n");
1177   for (unsigned i = 0, e = NumTo; i != e; ++i)
1178     assert((!To[i].getNode() ||
1179             N->getValueType(i) == To[i].getValueType()) &&
1180            "Cannot combine value to value of different type!");
1181
1182   WorklistRemover DeadNodes(*this);
1183   DAG.ReplaceAllUsesWith(N, To);
1184   if (AddTo) {
1185     // Push the new nodes and any users onto the worklist
1186     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1187       if (To[i].getNode())
1188         AddToWorklistWithUsers(To[i].getNode());
1189     }
1190   }
1191
1192   // Finally, if the node is now dead, remove it from the graph.  The node
1193   // may not be dead if the replacement process recursively simplified to
1194   // something else needing this node.
1195   if (N->use_empty())
1196     deleteAndRecombine(N);
1197   return SDValue(N, 0);
1198 }
1199
1200 void DAGCombiner::
1201 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1202   // Replace the old value with the new one.
1203   ++NodesCombined;
1204   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1205              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1206
1207   // Replace all uses.
1208   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1209
1210   // Push the new node and any (possibly new) users onto the worklist.
1211   AddToWorklistWithUsers(TLO.New.getNode());
1212
1213   // Finally, if the node is now dead, remove it from the graph.
1214   recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1215 }
1216
1217 /// Check the specified integer node value to see if it can be simplified or if
1218 /// things it uses can be simplified by bit propagation. If so, return true.
1219 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1220                                        const APInt &DemandedElts,
1221                                        bool AssumeSingleUse) {
1222   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1223   KnownBits Known;
1224   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1225                                 AssumeSingleUse))
1226     return false;
1227
1228   // Revisit the node.
1229   AddToWorklist(Op.getNode());
1230
1231   CommitTargetLoweringOpt(TLO);
1232   return true;
1233 }
1234
1235 /// Check the specified vector node value to see if it can be simplified or
1236 /// if things it uses can be simplified as it only uses some of the elements.
1237 /// If so, return true.
1238 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1239                                              const APInt &DemandedElts,
1240                                              bool AssumeSingleUse) {
1241   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1242   APInt KnownUndef, KnownZero;
1243   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1244                                       TLO, 0, AssumeSingleUse))
1245     return false;
1246
1247   // Revisit the node.
1248   AddToWorklist(Op.getNode());
1249
1250   CommitTargetLoweringOpt(TLO);
1251   return true;
1252 }
1253
1254 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1255   SDLoc DL(Load);
1256   EVT VT = Load->getValueType(0);
1257   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1258
1259   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1260              Trunc.dump(&DAG); dbgs() << '\n');
1261
1262   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1263   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1264
1265   AddToWorklist(Trunc.getNode());
1266   recursivelyDeleteUnusedNodes(Load);
1267 }
1268
1269 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1270   Replace = false;
1271   SDLoc DL(Op);
1272   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1273     LoadSDNode *LD = cast<LoadSDNode>(Op);
1274     EVT MemVT = LD->getMemoryVT();
1275     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1276                                                       : LD->getExtensionType();
1277     Replace = true;
1278     return DAG.getExtLoad(ExtType, DL, PVT,
1279                           LD->getChain(), LD->getBasePtr(),
1280                           MemVT, LD->getMemOperand());
1281   }
1282
1283   unsigned Opc = Op.getOpcode();
1284   switch (Opc) {
1285   default: break;
1286   case ISD::AssertSext:
1287     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1288       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1289     break;
1290   case ISD::AssertZext:
1291     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1292       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1293     break;
1294   case ISD::Constant: {
1295     unsigned ExtOpc =
1296       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1297     return DAG.getNode(ExtOpc, DL, PVT, Op);
1298   }
1299   }
1300
1301   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1302     return SDValue();
1303   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1304 }
1305
1306 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1307   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1308     return SDValue();
1309   EVT OldVT = Op.getValueType();
1310   SDLoc DL(Op);
1311   bool Replace = false;
1312   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1313   if (!NewOp.getNode())
1314     return SDValue();
1315   AddToWorklist(NewOp.getNode());
1316
1317   if (Replace)
1318     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1319   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1320                      DAG.getValueType(OldVT));
1321 }
1322
1323 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1324   EVT OldVT = Op.getValueType();
1325   SDLoc DL(Op);
1326   bool Replace = false;
1327   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1328   if (!NewOp.getNode())
1329     return SDValue();
1330   AddToWorklist(NewOp.getNode());
1331
1332   if (Replace)
1333     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1334   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1335 }
1336
1337 /// Promote the specified integer binary operation if the target indicates it is
1338 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1339 /// i32 since i16 instructions are longer.
1340 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1341   if (!LegalOperations)
1342     return SDValue();
1343
1344   EVT VT = Op.getValueType();
1345   if (VT.isVector() || !VT.isInteger())
1346     return SDValue();
1347
1348   // If operation type is 'undesirable', e.g. i16 on x86, consider
1349   // promoting it.
1350   unsigned Opc = Op.getOpcode();
1351   if (TLI.isTypeDesirableForOp(Opc, VT))
1352     return SDValue();
1353
1354   EVT PVT = VT;
1355   // Consult target whether it is a good idea to promote this operation and
1356   // what's the right type to promote it to.
1357   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1358     assert(PVT != VT && "Don't know what type to promote to!");
1359
1360     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1361
1362     bool Replace0 = false;
1363     SDValue N0 = Op.getOperand(0);
1364     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1365
1366     bool Replace1 = false;
1367     SDValue N1 = Op.getOperand(1);
1368     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1369     SDLoc DL(Op);
1370
1371     SDValue RV =
1372         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1373
1374     // We are always replacing N0/N1's use in N and only need additional
1375     // replacements if there are additional uses.
1376     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1377     //       (SDValue) here because the node may reference multiple values
1378     //       (for example, the chain value of a load node).
1379     Replace0 &= !N0->hasOneUse();
1380     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1381
1382     // Combine Op here so it is preserved past replacements.
1383     CombineTo(Op.getNode(), RV);
1384
1385     // If operands have a use ordering, make sure we deal with
1386     // predecessor first.
1387     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1388       std::swap(N0, N1);
1389       std::swap(NN0, NN1);
1390     }
1391
1392     if (Replace0) {
1393       AddToWorklist(NN0.getNode());
1394       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1395     }
1396     if (Replace1) {
1397       AddToWorklist(NN1.getNode());
1398       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1399     }
1400     return Op;
1401   }
1402   return SDValue();
1403 }
1404
1405 /// Promote the specified integer shift operation if the target indicates it is
1406 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1407 /// i32 since i16 instructions are longer.
1408 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1409   if (!LegalOperations)
1410     return SDValue();
1411
1412   EVT VT = Op.getValueType();
1413   if (VT.isVector() || !VT.isInteger())
1414     return SDValue();
1415
1416   // If operation type is 'undesirable', e.g. i16 on x86, consider
1417   // promoting it.
1418   unsigned Opc = Op.getOpcode();
1419   if (TLI.isTypeDesirableForOp(Opc, VT))
1420     return SDValue();
1421
1422   EVT PVT = VT;
1423   // Consult target whether it is a good idea to promote this operation and
1424   // what's the right type to promote it to.
1425   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1426     assert(PVT != VT && "Don't know what type to promote to!");
1427
1428     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1429
1430     bool Replace = false;
1431     SDValue N0 = Op.getOperand(0);
1432     if (Opc == ISD::SRA)
1433       N0 = SExtPromoteOperand(N0, PVT);
1434     else if (Opc == ISD::SRL)
1435       N0 = ZExtPromoteOperand(N0, PVT);
1436     else
1437       N0 = PromoteOperand(N0, PVT, Replace);
1438
1439     if (!N0.getNode())
1440       return SDValue();
1441
1442     SDLoc DL(Op);
1443     SDValue N1 = Op.getOperand(1);
1444     SDValue RV =
1445         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1446
1447     if (Replace)
1448       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1449
1450     // Deal with Op being deleted.
1451     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1452       return RV;
1453   }
1454   return SDValue();
1455 }
1456
1457 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1458   if (!LegalOperations)
1459     return SDValue();
1460
1461   EVT VT = Op.getValueType();
1462   if (VT.isVector() || !VT.isInteger())
1463     return SDValue();
1464
1465   // If operation type is 'undesirable', e.g. i16 on x86, consider
1466   // promoting it.
1467   unsigned Opc = Op.getOpcode();
1468   if (TLI.isTypeDesirableForOp(Opc, VT))
1469     return SDValue();
1470
1471   EVT PVT = VT;
1472   // Consult target whether it is a good idea to promote this operation and
1473   // what's the right type to promote it to.
1474   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1475     assert(PVT != VT && "Don't know what type to promote to!");
1476     // fold (aext (aext x)) -> (aext x)
1477     // fold (aext (zext x)) -> (zext x)
1478     // fold (aext (sext x)) -> (sext x)
1479     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1480     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1481   }
1482   return SDValue();
1483 }
1484
1485 bool DAGCombiner::PromoteLoad(SDValue Op) {
1486   if (!LegalOperations)
1487     return false;
1488
1489   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1490     return false;
1491
1492   EVT VT = Op.getValueType();
1493   if (VT.isVector() || !VT.isInteger())
1494     return false;
1495
1496   // If operation type is 'undesirable', e.g. i16 on x86, consider
1497   // promoting it.
1498   unsigned Opc = Op.getOpcode();
1499   if (TLI.isTypeDesirableForOp(Opc, VT))
1500     return false;
1501
1502   EVT PVT = VT;
1503   // Consult target whether it is a good idea to promote this operation and
1504   // what's the right type to promote it to.
1505   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1506     assert(PVT != VT && "Don't know what type to promote to!");
1507
1508     SDLoc DL(Op);
1509     SDNode *N = Op.getNode();
1510     LoadSDNode *LD = cast<LoadSDNode>(N);
1511     EVT MemVT = LD->getMemoryVT();
1512     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1513                                                       : LD->getExtensionType();
1514     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1515                                    LD->getChain(), LD->getBasePtr(),
1516                                    MemVT, LD->getMemOperand());
1517     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1518
1519     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1520                Result.dump(&DAG); dbgs() << '\n');
1521
1522     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1523     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1524
1525     AddToWorklist(Result.getNode());
1526     recursivelyDeleteUnusedNodes(N);
1527     return true;
1528   }
1529
1530   return false;
1531 }
1532
1533 /// Recursively delete a node which has no uses and any operands for
1534 /// which it is the only use.
1535 ///
1536 /// Note that this both deletes the nodes and removes them from the worklist.
1537 /// It also adds any nodes who have had a user deleted to the worklist as they
1538 /// may now have only one use and subject to other combines.
1539 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1540   if (!N->use_empty())
1541     return false;
1542
1543   SmallSetVector<SDNode *, 16> Nodes;
1544   Nodes.insert(N);
1545   do {
1546     N = Nodes.pop_back_val();
1547     if (!N)
1548       continue;
1549
1550     if (N->use_empty()) {
1551       for (const SDValue &ChildN : N->op_values())
1552         Nodes.insert(ChildN.getNode());
1553
1554       removeFromWorklist(N);
1555       DAG.DeleteNode(N);
1556     } else {
1557       AddToWorklist(N);
1558     }
1559   } while (!Nodes.empty());
1560   return true;
1561 }
1562
1563 //===----------------------------------------------------------------------===//
1564 //  Main DAG Combiner implementation
1565 //===----------------------------------------------------------------------===//
1566
1567 void DAGCombiner::Run(CombineLevel AtLevel) {
1568   // set the instance variables, so that the various visit routines may use it.
1569   Level = AtLevel;
1570   LegalDAG = Level >= AfterLegalizeDAG;
1571   LegalOperations = Level >= AfterLegalizeVectorOps;
1572   LegalTypes = Level >= AfterLegalizeTypes;
1573
1574   WorklistInserter AddNodes(*this);
1575
1576   // Add all the dag nodes to the worklist.
1577   for (SDNode &Node : DAG.allnodes())
1578     AddToWorklist(&Node);
1579
1580   // Create a dummy node (which is not added to allnodes), that adds a reference
1581   // to the root node, preventing it from being deleted, and tracking any
1582   // changes of the root.
1583   HandleSDNode Dummy(DAG.getRoot());
1584
1585   // While we have a valid worklist entry node, try to combine it.
1586   while (SDNode *N = getNextWorklistEntry()) {
1587     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1588     // N is deleted from the DAG, since they too may now be dead or may have a
1589     // reduced number of uses, allowing other xforms.
1590     if (recursivelyDeleteUnusedNodes(N))
1591       continue;
1592
1593     WorklistRemover DeadNodes(*this);
1594
1595     // If this combine is running after legalizing the DAG, re-legalize any
1596     // nodes pulled off the worklist.
1597     if (LegalDAG) {
1598       SmallSetVector<SDNode *, 16> UpdatedNodes;
1599       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1600
1601       for (SDNode *LN : UpdatedNodes)
1602         AddToWorklistWithUsers(LN);
1603
1604       if (!NIsValid)
1605         continue;
1606     }
1607
1608     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1609
1610     // Add any operands of the new node which have not yet been combined to the
1611     // worklist as well. Because the worklist uniques things already, this
1612     // won't repeatedly process the same operand.
1613     CombinedNodes.insert(N);
1614     for (const SDValue &ChildN : N->op_values())
1615       if (!CombinedNodes.count(ChildN.getNode()))
1616         AddToWorklist(ChildN.getNode());
1617
1618     SDValue RV = combine(N);
1619
1620     if (!RV.getNode())
1621       continue;
1622
1623     ++NodesCombined;
1624
1625     // If we get back the same node we passed in, rather than a new node or
1626     // zero, we know that the node must have defined multiple values and
1627     // CombineTo was used.  Since CombineTo takes care of the worklist
1628     // mechanics for us, we have no work to do in this case.
1629     if (RV.getNode() == N)
1630       continue;
1631
1632     assert(N->getOpcode() != ISD::DELETED_NODE &&
1633            RV.getOpcode() != ISD::DELETED_NODE &&
1634            "Node was deleted but visit returned new node!");
1635
1636     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1637
1638     if (N->getNumValues() == RV->getNumValues())
1639       DAG.ReplaceAllUsesWith(N, RV.getNode());
1640     else {
1641       assert(N->getValueType(0) == RV.getValueType() &&
1642              N->getNumValues() == 1 && "Type mismatch");
1643       DAG.ReplaceAllUsesWith(N, &RV);
1644     }
1645
1646     // Push the new node and any users onto the worklist.  Omit this if the
1647     // new node is the EntryToken (e.g. if a store managed to get optimized
1648     // out), because re-visiting the EntryToken and its users will not uncover
1649     // any additional opportunities, but there may be a large number of such
1650     // users, potentially causing compile time explosion.
1651     if (RV.getOpcode() != ISD::EntryToken) {
1652       AddToWorklist(RV.getNode());
1653       AddUsersToWorklist(RV.getNode());
1654     }
1655
1656     // Finally, if the node is now dead, remove it from the graph.  The node
1657     // may not be dead if the replacement process recursively simplified to
1658     // something else needing this node. This will also take care of adding any
1659     // operands which have lost a user to the worklist.
1660     recursivelyDeleteUnusedNodes(N);
1661   }
1662
1663   // If the root changed (e.g. it was a dead load, update the root).
1664   DAG.setRoot(Dummy.getValue());
1665   DAG.RemoveDeadNodes();
1666 }
1667
1668 SDValue DAGCombiner::visit(SDNode *N) {
1669   switch (N->getOpcode()) {
1670   default: break;
1671   case ISD::TokenFactor:        return visitTokenFactor(N);
1672   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1673   case ISD::ADD:                return visitADD(N);
1674   case ISD::SUB:                return visitSUB(N);
1675   case ISD::SADDSAT:
1676   case ISD::UADDSAT:            return visitADDSAT(N);
1677   case ISD::SSUBSAT:
1678   case ISD::USUBSAT:            return visitSUBSAT(N);
1679   case ISD::ADDC:               return visitADDC(N);
1680   case ISD::SADDO:
1681   case ISD::UADDO:              return visitADDO(N);
1682   case ISD::SUBC:               return visitSUBC(N);
1683   case ISD::SSUBO:
1684   case ISD::USUBO:              return visitSUBO(N);
1685   case ISD::ADDE:               return visitADDE(N);
1686   case ISD::ADDCARRY:           return visitADDCARRY(N);
1687   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1688   case ISD::SUBE:               return visitSUBE(N);
1689   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1690   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1691   case ISD::SMULFIX:
1692   case ISD::SMULFIXSAT:
1693   case ISD::UMULFIX:
1694   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1695   case ISD::MUL:                return visitMUL(N);
1696   case ISD::SDIV:               return visitSDIV(N);
1697   case ISD::UDIV:               return visitUDIV(N);
1698   case ISD::SREM:
1699   case ISD::UREM:               return visitREM(N);
1700   case ISD::MULHU:              return visitMULHU(N);
1701   case ISD::MULHS:              return visitMULHS(N);
1702   case ISD::AVGFLOORS:
1703   case ISD::AVGFLOORU:
1704   case ISD::AVGCEILS:
1705   case ISD::AVGCEILU:           return visitAVG(N);
1706   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1707   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1708   case ISD::SMULO:
1709   case ISD::UMULO:              return visitMULO(N);
1710   case ISD::SMIN:
1711   case ISD::SMAX:
1712   case ISD::UMIN:
1713   case ISD::UMAX:               return visitIMINMAX(N);
1714   case ISD::AND:                return visitAND(N);
1715   case ISD::OR:                 return visitOR(N);
1716   case ISD::XOR:                return visitXOR(N);
1717   case ISD::SHL:                return visitSHL(N);
1718   case ISD::SRA:                return visitSRA(N);
1719   case ISD::SRL:                return visitSRL(N);
1720   case ISD::ROTR:
1721   case ISD::ROTL:               return visitRotate(N);
1722   case ISD::FSHL:
1723   case ISD::FSHR:               return visitFunnelShift(N);
1724   case ISD::SSHLSAT:
1725   case ISD::USHLSAT:            return visitSHLSAT(N);
1726   case ISD::ABS:                return visitABS(N);
1727   case ISD::BSWAP:              return visitBSWAP(N);
1728   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1729   case ISD::CTLZ:               return visitCTLZ(N);
1730   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1731   case ISD::CTTZ:               return visitCTTZ(N);
1732   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1733   case ISD::CTPOP:              return visitCTPOP(N);
1734   case ISD::SELECT:             return visitSELECT(N);
1735   case ISD::VSELECT:            return visitVSELECT(N);
1736   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1737   case ISD::SETCC:              return visitSETCC(N);
1738   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1739   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1740   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1741   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1742   case ISD::AssertSext:
1743   case ISD::AssertZext:         return visitAssertExt(N);
1744   case ISD::AssertAlign:        return visitAssertAlign(N);
1745   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1746   case ISD::SIGN_EXTEND_VECTOR_INREG:
1747   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1748   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1749   case ISD::BITCAST:            return visitBITCAST(N);
1750   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1751   case ISD::FADD:               return visitFADD(N);
1752   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1753   case ISD::FSUB:               return visitFSUB(N);
1754   case ISD::FMUL:               return visitFMUL(N);
1755   case ISD::FMA:                return visitFMA(N);
1756   case ISD::FDIV:               return visitFDIV(N);
1757   case ISD::FREM:               return visitFREM(N);
1758   case ISD::FSQRT:              return visitFSQRT(N);
1759   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1760   case ISD::FPOW:               return visitFPOW(N);
1761   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1762   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1763   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1764   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1765   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1766   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1767   case ISD::FNEG:               return visitFNEG(N);
1768   case ISD::FABS:               return visitFABS(N);
1769   case ISD::FFLOOR:             return visitFFLOOR(N);
1770   case ISD::FMINNUM:
1771   case ISD::FMAXNUM:
1772   case ISD::FMINIMUM:
1773   case ISD::FMAXIMUM:           return visitFMinMax(N);
1774   case ISD::FCEIL:              return visitFCEIL(N);
1775   case ISD::FTRUNC:             return visitFTRUNC(N);
1776   case ISD::BRCOND:             return visitBRCOND(N);
1777   case ISD::BR_CC:              return visitBR_CC(N);
1778   case ISD::LOAD:               return visitLOAD(N);
1779   case ISD::STORE:              return visitSTORE(N);
1780   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1781   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1782   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1783   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1784   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1785   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1786   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1787   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1788   case ISD::MGATHER:            return visitMGATHER(N);
1789   case ISD::MLOAD:              return visitMLOAD(N);
1790   case ISD::MSCATTER:           return visitMSCATTER(N);
1791   case ISD::MSTORE:             return visitMSTORE(N);
1792   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1793   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1794   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1795   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
1796   case ISD::FREEZE:             return visitFREEZE(N);
1797   case ISD::VECREDUCE_FADD:
1798   case ISD::VECREDUCE_FMUL:
1799   case ISD::VECREDUCE_ADD:
1800   case ISD::VECREDUCE_MUL:
1801   case ISD::VECREDUCE_AND:
1802   case ISD::VECREDUCE_OR:
1803   case ISD::VECREDUCE_XOR:
1804   case ISD::VECREDUCE_SMAX:
1805   case ISD::VECREDUCE_SMIN:
1806   case ISD::VECREDUCE_UMAX:
1807   case ISD::VECREDUCE_UMIN:
1808   case ISD::VECREDUCE_FMAX:
1809   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1810 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1811 #include "llvm/IR/VPIntrinsics.def"
1812     return visitVPOp(N);
1813   }
1814   return SDValue();
1815 }
1816
1817 SDValue DAGCombiner::combine(SDNode *N) {
1818   SDValue RV;
1819   if (!DisableGenericCombines)
1820     RV = visit(N);
1821
1822   // If nothing happened, try a target-specific DAG combine.
1823   if (!RV.getNode()) {
1824     assert(N->getOpcode() != ISD::DELETED_NODE &&
1825            "Node was deleted but visit returned NULL!");
1826
1827     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1828         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1829
1830       // Expose the DAG combiner to the target combiner impls.
1831       TargetLowering::DAGCombinerInfo
1832         DagCombineInfo(DAG, Level, false, this);
1833
1834       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1835     }
1836   }
1837
1838   // If nothing happened still, try promoting the operation.
1839   if (!RV.getNode()) {
1840     switch (N->getOpcode()) {
1841     default: break;
1842     case ISD::ADD:
1843     case ISD::SUB:
1844     case ISD::MUL:
1845     case ISD::AND:
1846     case ISD::OR:
1847     case ISD::XOR:
1848       RV = PromoteIntBinOp(SDValue(N, 0));
1849       break;
1850     case ISD::SHL:
1851     case ISD::SRA:
1852     case ISD::SRL:
1853       RV = PromoteIntShiftOp(SDValue(N, 0));
1854       break;
1855     case ISD::SIGN_EXTEND:
1856     case ISD::ZERO_EXTEND:
1857     case ISD::ANY_EXTEND:
1858       RV = PromoteExtend(SDValue(N, 0));
1859       break;
1860     case ISD::LOAD:
1861       if (PromoteLoad(SDValue(N, 0)))
1862         RV = SDValue(N, 0);
1863       break;
1864     }
1865   }
1866
1867   // If N is a commutative binary node, try to eliminate it if the commuted
1868   // version is already present in the DAG.
1869   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
1870     SDValue N0 = N->getOperand(0);
1871     SDValue N1 = N->getOperand(1);
1872
1873     // Constant operands are canonicalized to RHS.
1874     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1875       SDValue Ops[] = {N1, N0};
1876       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1877                                             N->getFlags());
1878       if (CSENode)
1879         return SDValue(CSENode, 0);
1880     }
1881   }
1882
1883   return RV;
1884 }
1885
1886 /// Given a node, return its input chain if it has one, otherwise return a null
1887 /// sd operand.
1888 static SDValue getInputChainForNode(SDNode *N) {
1889   if (unsigned NumOps = N->getNumOperands()) {
1890     if (N->getOperand(0).getValueType() == MVT::Other)
1891       return N->getOperand(0);
1892     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1893       return N->getOperand(NumOps-1);
1894     for (unsigned i = 1; i < NumOps-1; ++i)
1895       if (N->getOperand(i).getValueType() == MVT::Other)
1896         return N->getOperand(i);
1897   }
1898   return SDValue();
1899 }
1900
1901 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1902   // If N has two operands, where one has an input chain equal to the other,
1903   // the 'other' chain is redundant.
1904   if (N->getNumOperands() == 2) {
1905     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1906       return N->getOperand(0);
1907     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1908       return N->getOperand(1);
1909   }
1910
1911   // Don't simplify token factors if optnone.
1912   if (OptLevel == CodeGenOpt::None)
1913     return SDValue();
1914
1915   // Don't simplify the token factor if the node itself has too many operands.
1916   if (N->getNumOperands() > TokenFactorInlineLimit)
1917     return SDValue();
1918
1919   // If the sole user is a token factor, we should make sure we have a
1920   // chance to merge them together. This prevents TF chains from inhibiting
1921   // optimizations.
1922   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1923     AddToWorklist(*(N->use_begin()));
1924
1925   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1926   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1927   SmallPtrSet<SDNode*, 16> SeenOps;
1928   bool Changed = false;             // If we should replace this token factor.
1929
1930   // Start out with this token factor.
1931   TFs.push_back(N);
1932
1933   // Iterate through token factors.  The TFs grows when new token factors are
1934   // encountered.
1935   for (unsigned i = 0; i < TFs.size(); ++i) {
1936     // Limit number of nodes to inline, to avoid quadratic compile times.
1937     // We have to add the outstanding Token Factors to Ops, otherwise we might
1938     // drop Ops from the resulting Token Factors.
1939     if (Ops.size() > TokenFactorInlineLimit) {
1940       for (unsigned j = i; j < TFs.size(); j++)
1941         Ops.emplace_back(TFs[j], 0);
1942       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1943       // combiner worklist later.
1944       TFs.resize(i);
1945       break;
1946     }
1947
1948     SDNode *TF = TFs[i];
1949     // Check each of the operands.
1950     for (const SDValue &Op : TF->op_values()) {
1951       switch (Op.getOpcode()) {
1952       case ISD::EntryToken:
1953         // Entry tokens don't need to be added to the list. They are
1954         // redundant.
1955         Changed = true;
1956         break;
1957
1958       case ISD::TokenFactor:
1959         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1960           // Queue up for processing.
1961           TFs.push_back(Op.getNode());
1962           Changed = true;
1963           break;
1964         }
1965         LLVM_FALLTHROUGH;
1966
1967       default:
1968         // Only add if it isn't already in the list.
1969         if (SeenOps.insert(Op.getNode()).second)
1970           Ops.push_back(Op);
1971         else
1972           Changed = true;
1973         break;
1974       }
1975     }
1976   }
1977
1978   // Re-visit inlined Token Factors, to clean them up in case they have been
1979   // removed. Skip the first Token Factor, as this is the current node.
1980   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1981     AddToWorklist(TFs[i]);
1982
1983   // Remove Nodes that are chained to another node in the list. Do so
1984   // by walking up chains breath-first stopping when we've seen
1985   // another operand. In general we must climb to the EntryNode, but we can exit
1986   // early if we find all remaining work is associated with just one operand as
1987   // no further pruning is possible.
1988
1989   // List of nodes to search through and original Ops from which they originate.
1990   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1991   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1992   SmallPtrSet<SDNode *, 16> SeenChains;
1993   bool DidPruneOps = false;
1994
1995   unsigned NumLeftToConsider = 0;
1996   for (const SDValue &Op : Ops) {
1997     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1998     OpWorkCount.push_back(1);
1999   }
2000
2001   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2002     // If this is an Op, we can remove the op from the list. Remark any
2003     // search associated with it as from the current OpNumber.
2004     if (SeenOps.contains(Op)) {
2005       Changed = true;
2006       DidPruneOps = true;
2007       unsigned OrigOpNumber = 0;
2008       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2009         OrigOpNumber++;
2010       assert((OrigOpNumber != Ops.size()) &&
2011              "expected to find TokenFactor Operand");
2012       // Re-mark worklist from OrigOpNumber to OpNumber
2013       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2014         if (Worklist[i].second == OrigOpNumber) {
2015           Worklist[i].second = OpNumber;
2016         }
2017       }
2018       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2019       OpWorkCount[OrigOpNumber] = 0;
2020       NumLeftToConsider--;
2021     }
2022     // Add if it's a new chain
2023     if (SeenChains.insert(Op).second) {
2024       OpWorkCount[OpNumber]++;
2025       Worklist.push_back(std::make_pair(Op, OpNumber));
2026     }
2027   };
2028
2029   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2030     // We need at least be consider at least 2 Ops to prune.
2031     if (NumLeftToConsider <= 1)
2032       break;
2033     auto CurNode = Worklist[i].first;
2034     auto CurOpNumber = Worklist[i].second;
2035     assert((OpWorkCount[CurOpNumber] > 0) &&
2036            "Node should not appear in worklist");
2037     switch (CurNode->getOpcode()) {
2038     case ISD::EntryToken:
2039       // Hitting EntryToken is the only way for the search to terminate without
2040       // hitting
2041       // another operand's search. Prevent us from marking this operand
2042       // considered.
2043       NumLeftToConsider++;
2044       break;
2045     case ISD::TokenFactor:
2046       for (const SDValue &Op : CurNode->op_values())
2047         AddToWorklist(i, Op.getNode(), CurOpNumber);
2048       break;
2049     case ISD::LIFETIME_START:
2050     case ISD::LIFETIME_END:
2051     case ISD::CopyFromReg:
2052     case ISD::CopyToReg:
2053       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2054       break;
2055     default:
2056       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2057         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2058       break;
2059     }
2060     OpWorkCount[CurOpNumber]--;
2061     if (OpWorkCount[CurOpNumber] == 0)
2062       NumLeftToConsider--;
2063   }
2064
2065   // If we've changed things around then replace token factor.
2066   if (Changed) {
2067     SDValue Result;
2068     if (Ops.empty()) {
2069       // The entry token is the only possible outcome.
2070       Result = DAG.getEntryNode();
2071     } else {
2072       if (DidPruneOps) {
2073         SmallVector<SDValue, 8> PrunedOps;
2074         //
2075         for (const SDValue &Op : Ops) {
2076           if (SeenChains.count(Op.getNode()) == 0)
2077             PrunedOps.push_back(Op);
2078         }
2079         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2080       } else {
2081         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2082       }
2083     }
2084     return Result;
2085   }
2086   return SDValue();
2087 }
2088
2089 /// MERGE_VALUES can always be eliminated.
2090 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2091   WorklistRemover DeadNodes(*this);
2092   // Replacing results may cause a different MERGE_VALUES to suddenly
2093   // be CSE'd with N, and carry its uses with it. Iterate until no
2094   // uses remain, to ensure that the node can be safely deleted.
2095   // First add the users of this node to the work list so that they
2096   // can be tried again once they have new operands.
2097   AddUsersToWorklist(N);
2098   do {
2099     // Do as a single replacement to avoid rewalking use lists.
2100     SmallVector<SDValue, 8> Ops;
2101     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2102       Ops.push_back(N->getOperand(i));
2103     DAG.ReplaceAllUsesWith(N, Ops.data());
2104   } while (!N->use_empty());
2105   deleteAndRecombine(N);
2106   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2107 }
2108
2109 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2110 /// ConstantSDNode pointer else nullptr.
2111 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2112   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2113   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2114 }
2115
2116 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2117 /// and that N may be folded in the load / store addressing mode.
2118 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2119                                     const TargetLowering &TLI) {
2120   EVT VT;
2121   unsigned AS;
2122
2123   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2124     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2125       return false;
2126     VT = LD->getMemoryVT();
2127     AS = LD->getAddressSpace();
2128   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2129     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2130       return false;
2131     VT = ST->getMemoryVT();
2132     AS = ST->getAddressSpace();
2133   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2134     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2135       return false;
2136     VT = LD->getMemoryVT();
2137     AS = LD->getAddressSpace();
2138   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2139     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2140       return false;
2141     VT = ST->getMemoryVT();
2142     AS = ST->getAddressSpace();
2143   } else {
2144     return false;
2145   }
2146
2147   TargetLowering::AddrMode AM;
2148   if (N->getOpcode() == ISD::ADD) {
2149     AM.HasBaseReg = true;
2150     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2151     if (Offset)
2152       // [reg +/- imm]
2153       AM.BaseOffs = Offset->getSExtValue();
2154     else
2155       // [reg +/- reg]
2156       AM.Scale = 1;
2157   } else if (N->getOpcode() == ISD::SUB) {
2158     AM.HasBaseReg = true;
2159     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2160     if (Offset)
2161       // [reg +/- imm]
2162       AM.BaseOffs = -Offset->getSExtValue();
2163     else
2164       // [reg +/- reg]
2165       AM.Scale = 1;
2166   } else {
2167     return false;
2168   }
2169
2170   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2171                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2172 }
2173
2174 /// This inverts a canonicalization in IR that replaces a variable select arm
2175 /// with an identity constant. Codegen improves if we re-use the variable
2176 /// operand rather than load a constant. This can also be converted into a
2177 /// masked vector operation if the target supports it.
2178 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2179                                               bool ShouldCommuteOperands) {
2180   // Match a select as operand 1. The identity constant that we are looking for
2181   // is only valid as operand 1 of a non-commutative binop.
2182   SDValue N0 = N->getOperand(0);
2183   SDValue N1 = N->getOperand(1);
2184   if (ShouldCommuteOperands)
2185     std::swap(N0, N1);
2186
2187   // TODO: Should this apply to scalar select too?
2188   if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2189     return SDValue();
2190
2191   unsigned Opcode = N->getOpcode();
2192   EVT VT = N->getValueType(0);
2193   SDValue Cond = N1.getOperand(0);
2194   SDValue TVal = N1.getOperand(1);
2195   SDValue FVal = N1.getOperand(2);
2196
2197   // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2198   // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2199   // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2200   auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2201     if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
2202       switch (Opcode) {
2203       case ISD::FADD: // X + -0.0 --> X
2204         return C->isZero() && C->isNegative();
2205       case ISD::FSUB: // X - 0.0 --> X
2206         return C->isZero() && !C->isNegative();
2207       case ISD::FMUL: // X * 1.0 --> X
2208       case ISD::FDIV: // X / 1.0 --> X
2209         return C->isExactlyValue(1.0);
2210       }
2211     }
2212     if (ConstantSDNode *C = isConstOrConstSplat(V)) {
2213       switch (Opcode) {
2214       case ISD::ADD: // X + 0 --> X
2215       case ISD::SUB: // X - 0 --> X
2216       case ISD::SHL: // X << 0 --> X
2217       case ISD::SRA: // X s>> 0 --> X
2218       case ISD::SRL: // X u>> 0 --> X
2219         return C->isZero();
2220       case ISD::MUL: // X * 1 --> X
2221         return C->isOne();
2222       }
2223     }
2224     return false;
2225   };
2226
2227   // This transform increases uses of N0, so freeze it to be safe.
2228   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2229   if (isIdentityConstantForOpcode(Opcode, TVal)) {
2230     SDValue F0 = DAG.getFreeze(N0);
2231     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2232     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2233   }
2234   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2235   if (isIdentityConstantForOpcode(Opcode, FVal)) {
2236     SDValue F0 = DAG.getFreeze(N0);
2237     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2238     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2239   }
2240
2241   return SDValue();
2242 }
2243
2244 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2245   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2246          "Unexpected binary operator");
2247
2248   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2249   auto BinOpcode = BO->getOpcode();
2250   EVT VT = BO->getValueType(0);
2251   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2252     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2253       return Sel;
2254
2255     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2256       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2257         return Sel;
2258   }
2259
2260   // Don't do this unless the old select is going away. We want to eliminate the
2261   // binary operator, not replace a binop with a select.
2262   // TODO: Handle ISD::SELECT_CC.
2263   unsigned SelOpNo = 0;
2264   SDValue Sel = BO->getOperand(0);
2265   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2266     SelOpNo = 1;
2267     Sel = BO->getOperand(1);
2268   }
2269
2270   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2271     return SDValue();
2272
2273   SDValue CT = Sel.getOperand(1);
2274   if (!isConstantOrConstantVector(CT, true) &&
2275       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2276     return SDValue();
2277
2278   SDValue CF = Sel.getOperand(2);
2279   if (!isConstantOrConstantVector(CF, true) &&
2280       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2281     return SDValue();
2282
2283   // Bail out if any constants are opaque because we can't constant fold those.
2284   // The exception is "and" and "or" with either 0 or -1 in which case we can
2285   // propagate non constant operands into select. I.e.:
2286   // and (select Cond, 0, -1), X --> select Cond, 0, X
2287   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2288   bool CanFoldNonConst =
2289       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2290       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2291       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2292
2293   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2294   if (!CanFoldNonConst &&
2295       !isConstantOrConstantVector(CBO, true) &&
2296       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2297     return SDValue();
2298
2299   // We have a select-of-constants followed by a binary operator with a
2300   // constant. Eliminate the binop by pulling the constant math into the select.
2301   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2302   SDLoc DL(Sel);
2303   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2304                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2305   if (!CanFoldNonConst && !NewCT.isUndef() &&
2306       !isConstantOrConstantVector(NewCT, true) &&
2307       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2308     return SDValue();
2309
2310   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2311                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2312   if (!CanFoldNonConst && !NewCF.isUndef() &&
2313       !isConstantOrConstantVector(NewCF, true) &&
2314       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2315     return SDValue();
2316
2317   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2318   SelectOp->setFlags(BO->getFlags());
2319   return SelectOp;
2320 }
2321
2322 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2323   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2324          "Expecting add or sub");
2325
2326   // Match a constant operand and a zext operand for the math instruction:
2327   // add Z, C
2328   // sub C, Z
2329   bool IsAdd = N->getOpcode() == ISD::ADD;
2330   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2331   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2332   auto *CN = dyn_cast<ConstantSDNode>(C);
2333   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2334     return SDValue();
2335
2336   // Match the zext operand as a setcc of a boolean.
2337   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2338       Z.getOperand(0).getValueType() != MVT::i1)
2339     return SDValue();
2340
2341   // Match the compare as: setcc (X & 1), 0, eq.
2342   SDValue SetCC = Z.getOperand(0);
2343   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2344   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2345       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2346       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2347     return SDValue();
2348
2349   // We are adding/subtracting a constant and an inverted low bit. Turn that
2350   // into a subtract/add of the low bit with incremented/decremented constant:
2351   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2352   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2353   EVT VT = C.getValueType();
2354   SDLoc DL(N);
2355   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2356   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2357                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2358   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2359 }
2360
2361 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2362 /// a shift and add with a different constant.
2363 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2364   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2365          "Expecting add or sub");
2366
2367   // We need a constant operand for the add/sub, and the other operand is a
2368   // logical shift right: add (srl), C or sub C, (srl).
2369   bool IsAdd = N->getOpcode() == ISD::ADD;
2370   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2371   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2372   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2373       ShiftOp.getOpcode() != ISD::SRL)
2374     return SDValue();
2375
2376   // The shift must be of a 'not' value.
2377   SDValue Not = ShiftOp.getOperand(0);
2378   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2379     return SDValue();
2380
2381   // The shift must be moving the sign bit to the least-significant-bit.
2382   EVT VT = ShiftOp.getValueType();
2383   SDValue ShAmt = ShiftOp.getOperand(1);
2384   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2385   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2386     return SDValue();
2387
2388   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2389   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2390   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2391   SDLoc DL(N);
2392   if (SDValue NewC = DAG.FoldConstantArithmetic(
2393           IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2394           {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2395     SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2396                                    Not.getOperand(0), ShAmt);
2397     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2398   }
2399
2400   return SDValue();
2401 }
2402
2403 static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2404   unsigned Opcode = V.getOpcode();
2405   if (Opcode == ISD::OR)
2406     return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2407   if (Opcode == ISD::XOR)
2408     return isMinSignedConstant(V.getOperand(1));
2409   return false;
2410 }
2411
2412 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2413 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2414 /// are no common bits set in the operands).
2415 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2416   SDValue N0 = N->getOperand(0);
2417   SDValue N1 = N->getOperand(1);
2418   EVT VT = N0.getValueType();
2419   SDLoc DL(N);
2420
2421   // fold (add x, undef) -> undef
2422   if (N0.isUndef())
2423     return N0;
2424   if (N1.isUndef())
2425     return N1;
2426
2427   // fold (add c1, c2) -> c1+c2
2428   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2429     return C;
2430
2431   // canonicalize constant to RHS
2432   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2433       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2434     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2435
2436   // fold vector ops
2437   if (VT.isVector()) {
2438     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2439       return FoldedVOp;
2440
2441     // fold (add x, 0) -> x, vector edition
2442     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2443       return N0;
2444   }
2445
2446   // fold (add x, 0) -> x
2447   if (isNullConstant(N1))
2448     return N0;
2449
2450   if (N0.getOpcode() == ISD::SUB) {
2451     SDValue N00 = N0.getOperand(0);
2452     SDValue N01 = N0.getOperand(1);
2453
2454     // fold ((A-c1)+c2) -> (A+(c2-c1))
2455     if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2456       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2457
2458     // fold ((c1-A)+c2) -> (c1+c2)-A
2459     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2460       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2461   }
2462
2463   // add (sext i1 X), 1 -> zext (not i1 X)
2464   // We don't transform this pattern:
2465   //   add (zext i1 X), -1 -> sext (not i1 X)
2466   // because most (?) targets generate better code for the zext form.
2467   if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2468       isOneOrOneSplat(N1)) {
2469     SDValue X = N0.getOperand(0);
2470     if ((!LegalOperations ||
2471          (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2472           TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2473         X.getScalarValueSizeInBits() == 1) {
2474       SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2475       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2476     }
2477   }
2478
2479   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2480   // iff (or x, c0) is equivalent to (add x, c0).
2481   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2482   // iff (xor x, c0) is equivalent to (add x, c0).
2483   if (isADDLike(N0, DAG)) {
2484     SDValue N01 = N0.getOperand(1);
2485     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2486       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2487   }
2488
2489   if (SDValue NewSel = foldBinOpIntoSelect(N))
2490     return NewSel;
2491
2492   // reassociate add
2493   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2494     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2495       return RADD;
2496
2497     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2498     // equivalent to (add x, c).
2499     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2500     // equivalent to (add x, c).
2501     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2502       if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2503           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2504         return DAG.getNode(ISD::ADD, DL, VT,
2505                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2506                            N0.getOperand(1));
2507       }
2508       return SDValue();
2509     };
2510     if (SDValue Add = ReassociateAddOr(N0, N1))
2511       return Add;
2512     if (SDValue Add = ReassociateAddOr(N1, N0))
2513       return Add;
2514   }
2515   // fold ((0-A) + B) -> B-A
2516   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2517     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2518
2519   // fold (A + (0-B)) -> A-B
2520   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2521     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2522
2523   // fold (A+(B-A)) -> B
2524   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2525     return N1.getOperand(0);
2526
2527   // fold ((B-A)+A) -> B
2528   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2529     return N0.getOperand(0);
2530
2531   // fold ((A-B)+(C-A)) -> (C-B)
2532   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2533       N0.getOperand(0) == N1.getOperand(1))
2534     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2535                        N0.getOperand(1));
2536
2537   // fold ((A-B)+(B-C)) -> (A-C)
2538   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2539       N0.getOperand(1) == N1.getOperand(0))
2540     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2541                        N1.getOperand(1));
2542
2543   // fold (A+(B-(A+C))) to (B-C)
2544   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2545       N0 == N1.getOperand(1).getOperand(0))
2546     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2547                        N1.getOperand(1).getOperand(1));
2548
2549   // fold (A+(B-(C+A))) to (B-C)
2550   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2551       N0 == N1.getOperand(1).getOperand(1))
2552     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2553                        N1.getOperand(1).getOperand(0));
2554
2555   // fold (A+((B-A)+or-C)) to (B+or-C)
2556   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2557       N1.getOperand(0).getOpcode() == ISD::SUB &&
2558       N0 == N1.getOperand(0).getOperand(1))
2559     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2560                        N1.getOperand(1));
2561
2562   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2563   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2564       N0->hasOneUse() && N1->hasOneUse()) {
2565     SDValue N00 = N0.getOperand(0);
2566     SDValue N01 = N0.getOperand(1);
2567     SDValue N10 = N1.getOperand(0);
2568     SDValue N11 = N1.getOperand(1);
2569
2570     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2571       return DAG.getNode(ISD::SUB, DL, VT,
2572                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2573                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2574   }
2575
2576   // fold (add (umax X, C), -C) --> (usubsat X, C)
2577   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2578     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2579       return (!Max && !Op) ||
2580              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2581     };
2582     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2583                                   /*AllowUndefs*/ true))
2584       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2585                          N0.getOperand(1));
2586   }
2587
2588   if (SimplifyDemandedBits(SDValue(N, 0)))
2589     return SDValue(N, 0);
2590
2591   if (isOneOrOneSplat(N1)) {
2592     // fold (add (xor a, -1), 1) -> (sub 0, a)
2593     if (isBitwiseNot(N0))
2594       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2595                          N0.getOperand(0));
2596
2597     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2598     if (N0.getOpcode() == ISD::ADD) {
2599       SDValue A, Xor;
2600
2601       if (isBitwiseNot(N0.getOperand(0))) {
2602         A = N0.getOperand(1);
2603         Xor = N0.getOperand(0);
2604       } else if (isBitwiseNot(N0.getOperand(1))) {
2605         A = N0.getOperand(0);
2606         Xor = N0.getOperand(1);
2607       }
2608
2609       if (Xor)
2610         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2611     }
2612
2613     // Look for:
2614     //   add (add x, y), 1
2615     // And if the target does not like this form then turn into:
2616     //   sub y, (xor x, -1)
2617     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2618         N0.hasOneUse()) {
2619       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2620                                 DAG.getAllOnesConstant(DL, VT));
2621       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2622     }
2623   }
2624
2625   // (x - y) + -1  ->  add (xor y, -1), x
2626   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2627       isAllOnesOrAllOnesSplat(N1)) {
2628     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2629     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2630   }
2631
2632   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2633     return Combined;
2634
2635   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2636     return Combined;
2637
2638   return SDValue();
2639 }
2640
2641 SDValue DAGCombiner::visitADD(SDNode *N) {
2642   SDValue N0 = N->getOperand(0);
2643   SDValue N1 = N->getOperand(1);
2644   EVT VT = N0.getValueType();
2645   SDLoc DL(N);
2646
2647   if (SDValue Combined = visitADDLike(N))
2648     return Combined;
2649
2650   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2651     return V;
2652
2653   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2654     return V;
2655
2656   // fold (a+b) -> (a|b) iff a and b share no bits.
2657   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2658       DAG.haveNoCommonBitsSet(N0, N1))
2659     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2660
2661   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2662   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2663     const APInt &C0 = N0->getConstantOperandAPInt(0);
2664     const APInt &C1 = N1->getConstantOperandAPInt(0);
2665     return DAG.getVScale(DL, VT, C0 + C1);
2666   }
2667
2668   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2669   if ((N0.getOpcode() == ISD::ADD) &&
2670       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2671       (N1.getOpcode() == ISD::VSCALE)) {
2672     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2673     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2674     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2675     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2676   }
2677
2678   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2679   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2680       N1.getOpcode() == ISD::STEP_VECTOR) {
2681     const APInt &C0 = N0->getConstantOperandAPInt(0);
2682     const APInt &C1 = N1->getConstantOperandAPInt(0);
2683     APInt NewStep = C0 + C1;
2684     return DAG.getStepVector(DL, VT, NewStep);
2685   }
2686
2687   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2688   if ((N0.getOpcode() == ISD::ADD) &&
2689       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2690       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2691     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2692     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2693     APInt NewStep = SV0 + SV1;
2694     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2695     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2696   }
2697
2698   return SDValue();
2699 }
2700
2701 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2702   unsigned Opcode = N->getOpcode();
2703   SDValue N0 = N->getOperand(0);
2704   SDValue N1 = N->getOperand(1);
2705   EVT VT = N0.getValueType();
2706   SDLoc DL(N);
2707
2708   // fold (add_sat x, undef) -> -1
2709   if (N0.isUndef() || N1.isUndef())
2710     return DAG.getAllOnesConstant(DL, VT);
2711
2712   // fold (add_sat c1, c2) -> c3
2713   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2714     return C;
2715
2716   // canonicalize constant to RHS
2717   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2718       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2719     return DAG.getNode(Opcode, DL, VT, N1, N0);
2720
2721   // fold vector ops
2722   if (VT.isVector()) {
2723     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2724       return FoldedVOp;
2725
2726     // fold (add_sat x, 0) -> x, vector edition
2727     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2728       return N0;
2729   }
2730
2731   // fold (add_sat x, 0) -> x
2732   if (isNullConstant(N1))
2733     return N0;
2734
2735   // If it cannot overflow, transform into an add.
2736   if (Opcode == ISD::UADDSAT)
2737     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2738       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2739
2740   return SDValue();
2741 }
2742
2743 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2744   bool Masked = false;
2745
2746   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2747   while (true) {
2748     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2749       V = V.getOperand(0);
2750       continue;
2751     }
2752
2753     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2754       Masked = true;
2755       V = V.getOperand(0);
2756       continue;
2757     }
2758
2759     break;
2760   }
2761
2762   // If this is not a carry, return.
2763   if (V.getResNo() != 1)
2764     return SDValue();
2765
2766   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2767       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2768     return SDValue();
2769
2770   EVT VT = V->getValueType(0);
2771   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2772     return SDValue();
2773
2774   // If the result is masked, then no matter what kind of bool it is we can
2775   // return. If it isn't, then we need to make sure the bool type is either 0 or
2776   // 1 and not other values.
2777   if (Masked ||
2778       TLI.getBooleanContents(V.getValueType()) ==
2779           TargetLoweringBase::ZeroOrOneBooleanContent)
2780     return V;
2781
2782   return SDValue();
2783 }
2784
2785 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2786 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2787 /// the opcode and bypass the mask operation.
2788 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2789                                  SelectionDAG &DAG, const SDLoc &DL) {
2790   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2791     return SDValue();
2792
2793   EVT VT = N0.getValueType();
2794   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2795     return SDValue();
2796
2797   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2798   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2799   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2800 }
2801
2802 /// Helper for doing combines based on N0 and N1 being added to each other.
2803 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2804                                           SDNode *LocReference) {
2805   EVT VT = N0.getValueType();
2806   SDLoc DL(LocReference);
2807
2808   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2809   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2810       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2811     return DAG.getNode(ISD::SUB, DL, VT, N0,
2812                        DAG.getNode(ISD::SHL, DL, VT,
2813                                    N1.getOperand(0).getOperand(1),
2814                                    N1.getOperand(1)));
2815
2816   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2817     return V;
2818
2819   // Look for:
2820   //   add (add x, 1), y
2821   // And if the target does not like this form then turn into:
2822   //   sub y, (xor x, -1)
2823   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2824       N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2825     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2826                               DAG.getAllOnesConstant(DL, VT));
2827     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2828   }
2829
2830   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2831     // Hoist one-use subtraction by non-opaque constant:
2832     //   (x - C) + y  ->  (x + y) - C
2833     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2834     if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2835       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2836       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2837     }
2838     // Hoist one-use subtraction from non-opaque constant:
2839     //   (C - x) + y  ->  (y - x) + C
2840     if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2841       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2842       return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2843     }
2844   }
2845
2846   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2847   // rather than 'add 0/-1' (the zext should get folded).
2848   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2849   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2850       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2851       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2852     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2853     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2854   }
2855
2856   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2857   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2858     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2859     if (TN->getVT() == MVT::i1) {
2860       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2861                                  DAG.getConstant(1, DL, VT));
2862       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2863     }
2864   }
2865
2866   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2867   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2868       N1.getResNo() == 0)
2869     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2870                        N0, N1.getOperand(0), N1.getOperand(2));
2871
2872   // (add X, Carry) -> (addcarry X, 0, Carry)
2873   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2874     if (SDValue Carry = getAsCarry(TLI, N1))
2875       return DAG.getNode(ISD::ADDCARRY, DL,
2876                          DAG.getVTList(VT, Carry.getValueType()), N0,
2877                          DAG.getConstant(0, DL, VT), Carry);
2878
2879   return SDValue();
2880 }
2881
2882 SDValue DAGCombiner::visitADDC(SDNode *N) {
2883   SDValue N0 = N->getOperand(0);
2884   SDValue N1 = N->getOperand(1);
2885   EVT VT = N0.getValueType();
2886   SDLoc DL(N);
2887
2888   // If the flag result is dead, turn this into an ADD.
2889   if (!N->hasAnyUseOfValue(1))
2890     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2891                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2892
2893   // canonicalize constant to RHS.
2894   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2895   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2896   if (N0C && !N1C)
2897     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2898
2899   // fold (addc x, 0) -> x + no carry out
2900   if (isNullConstant(N1))
2901     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2902                                         DL, MVT::Glue));
2903
2904   // If it cannot overflow, transform into an add.
2905   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2906     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2907                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2908
2909   return SDValue();
2910 }
2911
2912 /**
2913  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2914  * then the flip also occurs if computing the inverse is the same cost.
2915  * This function returns an empty SDValue in case it cannot flip the boolean
2916  * without increasing the cost of the computation. If you want to flip a boolean
2917  * no matter what, use DAG.getLogicalNOT.
2918  */
2919 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2920                                   const TargetLowering &TLI,
2921                                   bool Force) {
2922   if (Force && isa<ConstantSDNode>(V))
2923     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2924
2925   if (V.getOpcode() != ISD::XOR)
2926     return SDValue();
2927
2928   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2929   if (!Const)
2930     return SDValue();
2931
2932   EVT VT = V.getValueType();
2933
2934   bool IsFlip = false;
2935   switch(TLI.getBooleanContents(VT)) {
2936     case TargetLowering::ZeroOrOneBooleanContent:
2937       IsFlip = Const->isOne();
2938       break;
2939     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2940       IsFlip = Const->isAllOnes();
2941       break;
2942     case TargetLowering::UndefinedBooleanContent:
2943       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2944       break;
2945   }
2946
2947   if (IsFlip)
2948     return V.getOperand(0);
2949   if (Force)
2950     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2951   return SDValue();
2952 }
2953
2954 SDValue DAGCombiner::visitADDO(SDNode *N) {
2955   SDValue N0 = N->getOperand(0);
2956   SDValue N1 = N->getOperand(1);
2957   EVT VT = N0.getValueType();
2958   bool IsSigned = (ISD::SADDO == N->getOpcode());
2959
2960   EVT CarryVT = N->getValueType(1);
2961   SDLoc DL(N);
2962
2963   // If the flag result is dead, turn this into an ADD.
2964   if (!N->hasAnyUseOfValue(1))
2965     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2966                      DAG.getUNDEF(CarryVT));
2967
2968   // canonicalize constant to RHS.
2969   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2970       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2971     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2972
2973   // fold (addo x, 0) -> x + no carry out
2974   if (isNullOrNullSplat(N1))
2975     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2976
2977   if (!IsSigned) {
2978     // If it cannot overflow, transform into an add.
2979     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2980       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2981                        DAG.getConstant(0, DL, CarryVT));
2982
2983     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2984     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2985       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2986                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2987       return CombineTo(
2988           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2989     }
2990
2991     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2992       return Combined;
2993
2994     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2995       return Combined;
2996   }
2997
2998   return SDValue();
2999 }
3000
3001 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3002   EVT VT = N0.getValueType();
3003   if (VT.isVector())
3004     return SDValue();
3005
3006   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3007   // If Y + 1 cannot overflow.
3008   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3009     SDValue Y = N1.getOperand(0);
3010     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3011     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
3012       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3013                          N1.getOperand(2));
3014   }
3015
3016   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3017   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
3018     if (SDValue Carry = getAsCarry(TLI, N1))
3019       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3020                          DAG.getConstant(0, SDLoc(N), VT), Carry);
3021
3022   return SDValue();
3023 }
3024
3025 SDValue DAGCombiner::visitADDE(SDNode *N) {
3026   SDValue N0 = N->getOperand(0);
3027   SDValue N1 = N->getOperand(1);
3028   SDValue CarryIn = N->getOperand(2);
3029
3030   // canonicalize constant to RHS
3031   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3032   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3033   if (N0C && !N1C)
3034     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3035                        N1, N0, CarryIn);
3036
3037   // fold (adde x, y, false) -> (addc x, y)
3038   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3039     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3040
3041   return SDValue();
3042 }
3043
3044 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3045   SDValue N0 = N->getOperand(0);
3046   SDValue N1 = N->getOperand(1);
3047   SDValue CarryIn = N->getOperand(2);
3048   SDLoc DL(N);
3049
3050   // canonicalize constant to RHS
3051   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3052   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3053   if (N0C && !N1C)
3054     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3055
3056   // fold (addcarry x, y, false) -> (uaddo x, y)
3057   if (isNullConstant(CarryIn)) {
3058     if (!LegalOperations ||
3059         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3060       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3061   }
3062
3063   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3064   if (isNullConstant(N0) && isNullConstant(N1)) {
3065     EVT VT = N0.getValueType();
3066     EVT CarryVT = CarryIn.getValueType();
3067     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3068     AddToWorklist(CarryExt.getNode());
3069     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3070                                     DAG.getConstant(1, DL, VT)),
3071                      DAG.getConstant(0, DL, CarryVT));
3072   }
3073
3074   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3075     return Combined;
3076
3077   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3078     return Combined;
3079
3080   return SDValue();
3081 }
3082
3083 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3084   SDValue N0 = N->getOperand(0);
3085   SDValue N1 = N->getOperand(1);
3086   SDValue CarryIn = N->getOperand(2);
3087   SDLoc DL(N);
3088
3089   // canonicalize constant to RHS
3090   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3091   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3092   if (N0C && !N1C)
3093     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3094
3095   // fold (saddo_carry x, y, false) -> (saddo x, y)
3096   if (isNullConstant(CarryIn)) {
3097     if (!LegalOperations ||
3098         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3099       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3100   }
3101
3102   return SDValue();
3103 }
3104
3105 /**
3106  * If we are facing some sort of diamond carry propapagtion pattern try to
3107  * break it up to generate something like:
3108  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
3109  *
3110  * The end result is usually an increase in operation required, but because the
3111  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3112  *
3113  * Patterns typically look something like
3114  *            (uaddo A, B)
3115  *             /       \
3116  *          Carry      Sum
3117  *            |          \
3118  *            | (addcarry *, 0, Z)
3119  *            |       /
3120  *             \   Carry
3121  *              |   /
3122  * (addcarry X, *, *)
3123  *
3124  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3125  * produce a combine with a single path for carry propagation.
3126  */
3127 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3128                                       SDValue X, SDValue Carry0, SDValue Carry1,
3129                                       SDNode *N) {
3130   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3131     return SDValue();
3132   if (Carry1.getOpcode() != ISD::UADDO)
3133     return SDValue();
3134
3135   SDValue Z;
3136
3137   /**
3138    * First look for a suitable Z. It will present itself in the form of
3139    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3140    */
3141   if (Carry0.getOpcode() == ISD::ADDCARRY &&
3142       isNullConstant(Carry0.getOperand(1))) {
3143     Z = Carry0.getOperand(2);
3144   } else if (Carry0.getOpcode() == ISD::UADDO &&
3145              isOneConstant(Carry0.getOperand(1))) {
3146     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3147     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3148   } else {
3149     // We couldn't find a suitable Z.
3150     return SDValue();
3151   }
3152
3153
3154   auto cancelDiamond = [&](SDValue A,SDValue B) {
3155     SDLoc DL(N);
3156     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3157     Combiner.AddToWorklist(NewY.getNode());
3158     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3159                        DAG.getConstant(0, DL, X.getValueType()),
3160                        NewY.getValue(1));
3161   };
3162
3163   /**
3164    *      (uaddo A, B)
3165    *           |
3166    *          Sum
3167    *           |
3168    * (addcarry *, 0, Z)
3169    */
3170   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3171     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3172   }
3173
3174   /**
3175    * (addcarry A, 0, Z)
3176    *         |
3177    *        Sum
3178    *         |
3179    *  (uaddo *, B)
3180    */
3181   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3182     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3183   }
3184
3185   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3186     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3187   }
3188
3189   return SDValue();
3190 }
3191
3192 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3193 // match patterns like:
3194 //
3195 //          (uaddo A, B)            CarryIn
3196 //            |  \                     |
3197 //            |   \                    |
3198 //    PartialSum   PartialCarryOutX   /
3199 //            |        |             /
3200 //            |    ____|____________/
3201 //            |   /    |
3202 //     (uaddo *, *)    \________
3203 //       |  \                   \
3204 //       |   \                   |
3205 //       |    PartialCarryOutY   |
3206 //       |        \              |
3207 //       |         \            /
3208 //   AddCarrySum    |    ______/
3209 //                  |   /
3210 //   CarryOut = (or *, *)
3211 //
3212 // And generate ADDCARRY (or SUBCARRY) with two result values:
3213 //
3214 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3215 //
3216 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3217 // a single path for carry/borrow out propagation:
3218 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3219                                    SDValue N0, SDValue N1, SDNode *N) {
3220   SDValue Carry0 = getAsCarry(TLI, N0);
3221   if (!Carry0)
3222     return SDValue();
3223   SDValue Carry1 = getAsCarry(TLI, N1);
3224   if (!Carry1)
3225     return SDValue();
3226
3227   unsigned Opcode = Carry0.getOpcode();
3228   if (Opcode != Carry1.getOpcode())
3229     return SDValue();
3230   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3231     return SDValue();
3232
3233   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3234   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3235   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3236     std::swap(Carry0, Carry1);
3237
3238   // Check if nodes are connected in expected way.
3239   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3240       Carry1.getOperand(1) != Carry0.getValue(0))
3241     return SDValue();
3242
3243   // The carry in value must be on the righthand side for subtraction.
3244   unsigned CarryInOperandNum =
3245       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3246   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3247     return SDValue();
3248   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3249
3250   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3251   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3252     return SDValue();
3253
3254   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3255   // TODO: make getAsCarry() aware of how partial carries are merged.
3256   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3257     return SDValue();
3258   CarryIn = CarryIn.getOperand(0);
3259   if (CarryIn.getValueType() != MVT::i1)
3260     return SDValue();
3261
3262   SDLoc DL(N);
3263   SDValue Merged =
3264       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3265                   Carry0.getOperand(1), CarryIn);
3266
3267   // Please note that because we have proven that the result of the UADDO/USUBO
3268   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3269   // therefore prove that if the first UADDO/USUBO overflows, the second
3270   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3271   // maximum value.
3272   //
3273   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3274   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3275   //
3276   // This is important because it means that OR and XOR can be used to merge
3277   // carry flags; and that AND can return a constant zero.
3278   //
3279   // TODO: match other operations that can merge flags (ADD, etc)
3280   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3281   if (N->getOpcode() == ISD::AND)
3282     return DAG.getConstant(0, DL, MVT::i1);
3283   return Merged.getValue(1);
3284 }
3285
3286 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3287                                        SDNode *N) {
3288   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3289   if (isBitwiseNot(N0))
3290     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3291       SDLoc DL(N);
3292       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3293                                 N0.getOperand(0), NotC);
3294       return CombineTo(
3295           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3296     }
3297
3298   // Iff the flag result is dead:
3299   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3300   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3301   // or the dependency between the instructions.
3302   if ((N0.getOpcode() == ISD::ADD ||
3303        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3304         N0.getValue(1) != CarryIn)) &&
3305       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3306     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3307                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3308
3309   /**
3310    * When one of the addcarry argument is itself a carry, we may be facing
3311    * a diamond carry propagation. In which case we try to transform the DAG
3312    * to ensure linear carry propagation if that is possible.
3313    */
3314   if (auto Y = getAsCarry(TLI, N1)) {
3315     // Because both are carries, Y and Z can be swapped.
3316     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3317       return R;
3318     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3319       return R;
3320   }
3321
3322   return SDValue();
3323 }
3324
3325 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3326 // clamp/truncation if necessary.
3327 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3328                                    SDValue RHS, SelectionDAG &DAG,
3329                                    const SDLoc &DL) {
3330   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3331          "Illegal truncation");
3332
3333   if (DstVT == SrcVT)
3334     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3335
3336   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3337   // clamping RHS.
3338   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3339                                           DstVT.getScalarSizeInBits());
3340   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3341     return SDValue();
3342
3343   SDValue SatLimit =
3344       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3345                                            DstVT.getScalarSizeInBits()),
3346                       DL, SrcVT);
3347   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3348   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3349   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3350   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3351 }
3352
3353 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3354 // usubsat(a,b), optionally as a truncated type.
3355 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3356   if (N->getOpcode() != ISD::SUB ||
3357       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3358     return SDValue();
3359
3360   EVT SubVT = N->getValueType(0);
3361   SDValue Op0 = N->getOperand(0);
3362   SDValue Op1 = N->getOperand(1);
3363
3364   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3365   // they may be converted to usubsat(a,b).
3366   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3367     SDValue MaxLHS = Op0.getOperand(0);
3368     SDValue MaxRHS = Op0.getOperand(1);
3369     if (MaxLHS == Op1)
3370       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3371     if (MaxRHS == Op1)
3372       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3373   }
3374
3375   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3376     SDValue MinLHS = Op1.getOperand(0);
3377     SDValue MinRHS = Op1.getOperand(1);
3378     if (MinLHS == Op0)
3379       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3380     if (MinRHS == Op0)
3381       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3382   }
3383
3384   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3385   if (Op1.getOpcode() == ISD::TRUNCATE &&
3386       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3387       Op1.getOperand(0).hasOneUse()) {
3388     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3389     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3390     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3391       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3392                                  DAG, SDLoc(N));
3393     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3394       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3395                                  DAG, SDLoc(N));
3396   }
3397
3398   return SDValue();
3399 }
3400
3401 // Since it may not be valid to emit a fold to zero for vector initializers
3402 // check if we can before folding.
3403 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3404                              SelectionDAG &DAG, bool LegalOperations) {
3405   if (!VT.isVector())
3406     return DAG.getConstant(0, DL, VT);
3407   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3408     return DAG.getConstant(0, DL, VT);
3409   return SDValue();
3410 }
3411
3412 SDValue DAGCombiner::visitSUB(SDNode *N) {
3413   SDValue N0 = N->getOperand(0);
3414   SDValue N1 = N->getOperand(1);
3415   EVT VT = N0.getValueType();
3416   SDLoc DL(N);
3417
3418   auto PeekThroughFreeze = [](SDValue N) {
3419     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3420       return N->getOperand(0);
3421     return N;
3422   };
3423
3424   // fold (sub x, x) -> 0
3425   // FIXME: Refactor this and xor and other similar operations together.
3426   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3427     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3428
3429   // fold (sub c1, c2) -> c3
3430   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3431     return C;
3432
3433   // fold vector ops
3434   if (VT.isVector()) {
3435     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3436       return FoldedVOp;
3437
3438     // fold (sub x, 0) -> x, vector edition
3439     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3440       return N0;
3441   }
3442
3443   if (SDValue NewSel = foldBinOpIntoSelect(N))
3444     return NewSel;
3445
3446   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3447
3448   // fold (sub x, c) -> (add x, -c)
3449   if (N1C) {
3450     return DAG.getNode(ISD::ADD, DL, VT, N0,
3451                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3452   }
3453
3454   if (isNullOrNullSplat(N0)) {
3455     unsigned BitWidth = VT.getScalarSizeInBits();
3456     // Right-shifting everything out but the sign bit followed by negation is
3457     // the same as flipping arithmetic/logical shift type without the negation:
3458     // -(X >>u 31) -> (X >>s 31)
3459     // -(X >>s 31) -> (X >>u 31)
3460     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3461       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3462       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3463         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3464         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3465           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3466       }
3467     }
3468
3469     // 0 - X --> 0 if the sub is NUW.
3470     if (N->getFlags().hasNoUnsignedWrap())
3471       return N0;
3472
3473     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3474       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3475       // N1 must be 0 because negating the minimum signed value is undefined.
3476       if (N->getFlags().hasNoSignedWrap())
3477         return N0;
3478
3479       // 0 - X --> X if X is 0 or the minimum signed value.
3480       return N1;
3481     }
3482
3483     // Convert 0 - abs(x).
3484     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3485         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3486       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3487         return Result;
3488
3489     // Fold neg(splat(neg(x)) -> splat(x)
3490     if (VT.isVector()) {
3491       SDValue N1S = DAG.getSplatValue(N1, true);
3492       if (N1S && N1S.getOpcode() == ISD::SUB &&
3493           isNullConstant(N1S.getOperand(0))) {
3494         if (VT.isScalableVector())
3495           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3496         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3497       }
3498     }
3499   }
3500
3501   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3502   if (isAllOnesOrAllOnesSplat(N0))
3503     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3504
3505   // fold (A - (0-B)) -> A+B
3506   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3507     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3508
3509   // fold A-(A-B) -> B
3510   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3511     return N1.getOperand(1);
3512
3513   // fold (A+B)-A -> B
3514   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3515     return N0.getOperand(1);
3516
3517   // fold (A+B)-B -> A
3518   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3519     return N0.getOperand(0);
3520
3521   // fold (A+C1)-C2 -> A+(C1-C2)
3522   if (N0.getOpcode() == ISD::ADD) {
3523     SDValue N01 = N0.getOperand(1);
3524     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3525       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3526   }
3527
3528   // fold C2-(A+C1) -> (C2-C1)-A
3529   if (N1.getOpcode() == ISD::ADD) {
3530     SDValue N11 = N1.getOperand(1);
3531     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3532       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3533   }
3534
3535   // fold (A-C1)-C2 -> A-(C1+C2)
3536   if (N0.getOpcode() == ISD::SUB) {
3537     SDValue N01 = N0.getOperand(1);
3538     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3539       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3540   }
3541
3542   // fold (c1-A)-c2 -> (c1-c2)-A
3543   if (N0.getOpcode() == ISD::SUB) {
3544     SDValue N00 = N0.getOperand(0);
3545     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3546       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3547   }
3548
3549   // fold ((A+(B+or-C))-B) -> A+or-C
3550   if (N0.getOpcode() == ISD::ADD &&
3551       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3552        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3553       N0.getOperand(1).getOperand(0) == N1)
3554     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3555                        N0.getOperand(1).getOperand(1));
3556
3557   // fold ((A+(C+B))-B) -> A+C
3558   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3559       N0.getOperand(1).getOperand(1) == N1)
3560     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3561                        N0.getOperand(1).getOperand(0));
3562
3563   // fold ((A-(B-C))-C) -> A-B
3564   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3565       N0.getOperand(1).getOperand(1) == N1)
3566     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3567                        N0.getOperand(1).getOperand(0));
3568
3569   // fold (A-(B-C)) -> A+(C-B)
3570   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3571     return DAG.getNode(ISD::ADD, DL, VT, N0,
3572                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3573                                    N1.getOperand(0)));
3574
3575   // A - (A & B)  ->  A & (~B)
3576   if (N1.getOpcode() == ISD::AND) {
3577     SDValue A = N1.getOperand(0);
3578     SDValue B = N1.getOperand(1);
3579     if (A != N0)
3580       std::swap(A, B);
3581     if (A == N0 &&
3582         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3583       SDValue InvB =
3584           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3585       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3586     }
3587   }
3588
3589   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3590   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3591     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3592         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3593       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3594                                 N1.getOperand(0).getOperand(1),
3595                                 N1.getOperand(1));
3596       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3597     }
3598     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3599         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3600       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3601                                 N1.getOperand(0),
3602                                 N1.getOperand(1).getOperand(1));
3603       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3604     }
3605   }
3606
3607   // If either operand of a sub is undef, the result is undef
3608   if (N0.isUndef())
3609     return N0;
3610   if (N1.isUndef())
3611     return N1;
3612
3613   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3614     return V;
3615
3616   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3617     return V;
3618
3619   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3620     return V;
3621
3622   if (SDValue V = foldSubToUSubSat(VT, N))
3623     return V;
3624
3625   // (x - y) - 1  ->  add (xor y, -1), x
3626   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3627     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3628                               DAG.getAllOnesConstant(DL, VT));
3629     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3630   }
3631
3632   // Look for:
3633   //   sub y, (xor x, -1)
3634   // And if the target does not like this form then turn into:
3635   //   add (add x, y), 1
3636   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3637     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3638     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3639   }
3640
3641   // Hoist one-use addition by non-opaque constant:
3642   //   (x + C) - y  ->  (x - y) + C
3643   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3644       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3645     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3646     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3647   }
3648   // y - (x + C)  ->  (y - x) - C
3649   if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3650       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3651     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3652     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3653   }
3654   // (x - C) - y  ->  (x - y) - C
3655   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3656   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3657       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3658     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3659     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3660   }
3661   // (C - x) - y  ->  C - (x + y)
3662   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3663       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3664     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3665     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3666   }
3667
3668   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3669   // rather than 'sub 0/1' (the sext should get folded).
3670   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3671   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3672       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3673       TLI.getBooleanContents(VT) ==
3674           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3675     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3676     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3677   }
3678
3679   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3680   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3681     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3682       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3683       SDValue S0 = N1.getOperand(0);
3684       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3685         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3686           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3687             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3688     }
3689   }
3690
3691   // If the relocation model supports it, consider symbol offsets.
3692   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3693     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3694       // fold (sub Sym, c) -> Sym-c
3695       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3696         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3697                                     GA->getOffset() -
3698                                         (uint64_t)N1C->getSExtValue());
3699       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3700       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3701         if (GA->getGlobal() == GB->getGlobal())
3702           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3703                                  DL, VT);
3704     }
3705
3706   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3707   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3708     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3709     if (TN->getVT() == MVT::i1) {
3710       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3711                                  DAG.getConstant(1, DL, VT));
3712       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3713     }
3714   }
3715
3716   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3717   if (N1.getOpcode() == ISD::VSCALE) {
3718     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3719     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3720   }
3721
3722   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3723   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3724     APInt NewStep = -N1.getConstantOperandAPInt(0);
3725     return DAG.getNode(ISD::ADD, DL, VT, N0,
3726                        DAG.getStepVector(DL, VT, NewStep));
3727   }
3728
3729   // Prefer an add for more folding potential and possibly better codegen:
3730   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3731   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3732     SDValue ShAmt = N1.getOperand(1);
3733     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3734     if (ShAmtC &&
3735         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3736       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3737       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3738     }
3739   }
3740
3741   // As with the previous fold, prefer add for more folding potential.
3742   // Subtracting SMIN/0 is the same as adding SMIN/0:
3743   // N0 - (X << BW-1) --> N0 + (X << BW-1)
3744   if (N1.getOpcode() == ISD::SHL) {
3745     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
3746     if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3747       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3748   }
3749
3750   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3751     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3752     if (SDValue Carry = getAsCarry(TLI, N0)) {
3753       SDValue X = N1;
3754       SDValue Zero = DAG.getConstant(0, DL, VT);
3755       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3756       return DAG.getNode(ISD::ADDCARRY, DL,
3757                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3758                          Carry);
3759     }
3760   }
3761
3762   // If there's no chance of borrowing from adjacent bits, then sub is xor:
3763   // sub C0, X --> xor X, C0
3764   if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
3765     if (!C0->isOpaque()) {
3766       const APInt &C0Val = C0->getAPIntValue();
3767       const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
3768       if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
3769         return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3770     }
3771   }
3772
3773   return SDValue();
3774 }
3775
3776 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3777   SDValue N0 = N->getOperand(0);
3778   SDValue N1 = N->getOperand(1);
3779   EVT VT = N0.getValueType();
3780   SDLoc DL(N);
3781
3782   // fold (sub_sat x, undef) -> 0
3783   if (N0.isUndef() || N1.isUndef())
3784     return DAG.getConstant(0, DL, VT);
3785
3786   // fold (sub_sat x, x) -> 0
3787   if (N0 == N1)
3788     return DAG.getConstant(0, DL, VT);
3789
3790   // fold (sub_sat c1, c2) -> c3
3791   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3792     return C;
3793
3794   // fold vector ops
3795   if (VT.isVector()) {
3796     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3797       return FoldedVOp;
3798
3799     // fold (sub_sat x, 0) -> x, vector edition
3800     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3801       return N0;
3802   }
3803
3804   // fold (sub_sat x, 0) -> x
3805   if (isNullConstant(N1))
3806     return N0;
3807
3808   return SDValue();
3809 }
3810
3811 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3812   SDValue N0 = N->getOperand(0);
3813   SDValue N1 = N->getOperand(1);
3814   EVT VT = N0.getValueType();
3815   SDLoc DL(N);
3816
3817   // If the flag result is dead, turn this into an SUB.
3818   if (!N->hasAnyUseOfValue(1))
3819     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3820                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3821
3822   // fold (subc x, x) -> 0 + no borrow
3823   if (N0 == N1)
3824     return CombineTo(N, DAG.getConstant(0, DL, VT),
3825                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3826
3827   // fold (subc x, 0) -> x + no borrow
3828   if (isNullConstant(N1))
3829     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3830
3831   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3832   if (isAllOnesConstant(N0))
3833     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3834                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3835
3836   return SDValue();
3837 }
3838
3839 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3840   SDValue N0 = N->getOperand(0);
3841   SDValue N1 = N->getOperand(1);
3842   EVT VT = N0.getValueType();
3843   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3844
3845   EVT CarryVT = N->getValueType(1);
3846   SDLoc DL(N);
3847
3848   // If the flag result is dead, turn this into an SUB.
3849   if (!N->hasAnyUseOfValue(1))
3850     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3851                      DAG.getUNDEF(CarryVT));
3852
3853   // fold (subo x, x) -> 0 + no borrow
3854   if (N0 == N1)
3855     return CombineTo(N, DAG.getConstant(0, DL, VT),
3856                      DAG.getConstant(0, DL, CarryVT));
3857
3858   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3859
3860   // fold (subox, c) -> (addo x, -c)
3861   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3862     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3863                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3864   }
3865
3866   // fold (subo x, 0) -> x + no borrow
3867   if (isNullOrNullSplat(N1))
3868     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3869
3870   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3871   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3872     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3873                      DAG.getConstant(0, DL, CarryVT));
3874
3875   return SDValue();
3876 }
3877
3878 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3879   SDValue N0 = N->getOperand(0);
3880   SDValue N1 = N->getOperand(1);
3881   SDValue CarryIn = N->getOperand(2);
3882
3883   // fold (sube x, y, false) -> (subc x, y)
3884   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3885     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3886
3887   return SDValue();
3888 }
3889
3890 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3891   SDValue N0 = N->getOperand(0);
3892   SDValue N1 = N->getOperand(1);
3893   SDValue CarryIn = N->getOperand(2);
3894
3895   // fold (subcarry x, y, false) -> (usubo x, y)
3896   if (isNullConstant(CarryIn)) {
3897     if (!LegalOperations ||
3898         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3899       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3900   }
3901
3902   return SDValue();
3903 }
3904
3905 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3906   SDValue N0 = N->getOperand(0);
3907   SDValue N1 = N->getOperand(1);
3908   SDValue CarryIn = N->getOperand(2);
3909
3910   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3911   if (isNullConstant(CarryIn)) {
3912     if (!LegalOperations ||
3913         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3914       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3915   }
3916
3917   return SDValue();
3918 }
3919
3920 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3921 // UMULFIXSAT here.
3922 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3923   SDValue N0 = N->getOperand(0);
3924   SDValue N1 = N->getOperand(1);
3925   SDValue Scale = N->getOperand(2);
3926   EVT VT = N0.getValueType();
3927
3928   // fold (mulfix x, undef, scale) -> 0
3929   if (N0.isUndef() || N1.isUndef())
3930     return DAG.getConstant(0, SDLoc(N), VT);
3931
3932   // Canonicalize constant to RHS (vector doesn't have to splat)
3933   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3934      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3935     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3936
3937   // fold (mulfix x, 0, scale) -> 0
3938   if (isNullConstant(N1))
3939     return DAG.getConstant(0, SDLoc(N), VT);
3940
3941   return SDValue();
3942 }
3943
3944 SDValue DAGCombiner::visitMUL(SDNode *N) {
3945   SDValue N0 = N->getOperand(0);
3946   SDValue N1 = N->getOperand(1);
3947   EVT VT = N0.getValueType();
3948   SDLoc DL(N);
3949
3950   // fold (mul x, undef) -> 0
3951   if (N0.isUndef() || N1.isUndef())
3952     return DAG.getConstant(0, DL, VT);
3953
3954   // fold (mul c1, c2) -> c1*c2
3955   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
3956     return C;
3957
3958   // canonicalize constant to RHS (vector doesn't have to splat)
3959   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3960       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3961     return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
3962
3963   bool N1IsConst = false;
3964   bool N1IsOpaqueConst = false;
3965   APInt ConstValue1;
3966
3967   // fold vector ops
3968   if (VT.isVector()) {
3969     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3970       return FoldedVOp;
3971
3972     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3973     assert((!N1IsConst ||
3974             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3975            "Splat APInt should be element width");
3976   } else {
3977     N1IsConst = isa<ConstantSDNode>(N1);
3978     if (N1IsConst) {
3979       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3980       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3981     }
3982   }
3983
3984   // fold (mul x, 0) -> 0
3985   if (N1IsConst && ConstValue1.isZero())
3986     return N1;
3987
3988   // fold (mul x, 1) -> x
3989   if (N1IsConst && ConstValue1.isOne())
3990     return N0;
3991
3992   if (SDValue NewSel = foldBinOpIntoSelect(N))
3993     return NewSel;
3994
3995   // fold (mul x, -1) -> 0-x
3996   if (N1IsConst && ConstValue1.isAllOnes())
3997     return DAG.getNode(ISD::SUB, DL, VT,
3998                        DAG.getConstant(0, DL, VT), N0);
3999
4000   // fold (mul x, (1 << c)) -> x << c
4001   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4002       DAG.isKnownToBeAPowerOfTwo(N1) &&
4003       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4004     SDValue LogBase2 = BuildLogBase2(N1, DL);
4005     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4006     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4007     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4008   }
4009
4010   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4011   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4012     unsigned Log2Val = (-ConstValue1).logBase2();
4013     // FIXME: If the input is something that is easily negated (e.g. a
4014     // single-use add), we should put the negate there.
4015     return DAG.getNode(ISD::SUB, DL, VT,
4016                        DAG.getConstant(0, DL, VT),
4017                        DAG.getNode(ISD::SHL, DL, VT, N0,
4018                             DAG.getConstant(Log2Val, DL,
4019                                       getShiftAmountTy(N0.getValueType()))));
4020   }
4021
4022   // Try to transform:
4023   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4024   // mul x, (2^N + 1) --> add (shl x, N), x
4025   // mul x, (2^N - 1) --> sub (shl x, N), x
4026   // Examples: x * 33 --> (x << 5) + x
4027   //           x * 15 --> (x << 4) - x
4028   //           x * -33 --> -((x << 5) + x)
4029   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4030   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4031   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4032   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4033   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4034   //           x * 0xf800 --> (x << 16) - (x << 11)
4035   //           x * -0x8800 --> -((x << 15) + (x << 11))
4036   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4037   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4038     // TODO: We could handle more general decomposition of any constant by
4039     //       having the target set a limit on number of ops and making a
4040     //       callback to determine that sequence (similar to sqrt expansion).
4041     unsigned MathOp = ISD::DELETED_NODE;
4042     APInt MulC = ConstValue1.abs();
4043     // The constant `2` should be treated as (2^0 + 1).
4044     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4045     MulC.lshrInPlace(TZeros);
4046     if ((MulC - 1).isPowerOf2())
4047       MathOp = ISD::ADD;
4048     else if ((MulC + 1).isPowerOf2())
4049       MathOp = ISD::SUB;
4050
4051     if (MathOp != ISD::DELETED_NODE) {
4052       unsigned ShAmt =
4053           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4054       ShAmt += TZeros;
4055       assert(ShAmt < VT.getScalarSizeInBits() &&
4056              "multiply-by-constant generated out of bounds shift");
4057       SDValue Shl =
4058           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4059       SDValue R =
4060           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4061                                DAG.getNode(ISD::SHL, DL, VT, N0,
4062                                            DAG.getConstant(TZeros, DL, VT)))
4063                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4064       if (ConstValue1.isNegative())
4065         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
4066       return R;
4067     }
4068   }
4069
4070   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4071   if (N0.getOpcode() == ISD::SHL) {
4072     SDValue N01 = N0.getOperand(1);
4073     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4074       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4075   }
4076
4077   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4078   // use.
4079   {
4080     SDValue Sh, Y;
4081
4082     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4083     if (N0.getOpcode() == ISD::SHL &&
4084         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4085       Sh = N0; Y = N1;
4086     } else if (N1.getOpcode() == ISD::SHL &&
4087                isConstantOrConstantVector(N1.getOperand(1)) &&
4088                N1->hasOneUse()) {
4089       Sh = N1; Y = N0;
4090     }
4091
4092     if (Sh.getNode()) {
4093       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4094       return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4095     }
4096   }
4097
4098   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4099   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4100       N0.getOpcode() == ISD::ADD &&
4101       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4102       isMulAddWithConstProfitable(N, N0, N1))
4103     return DAG.getNode(
4104         ISD::ADD, DL, VT,
4105         DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4106         DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4107
4108   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4109   if (N0.getOpcode() == ISD::VSCALE)
4110     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4111       const APInt &C0 = N0.getConstantOperandAPInt(0);
4112       const APInt &C1 = NC1->getAPIntValue();
4113       return DAG.getVScale(DL, VT, C0 * C1);
4114     }
4115
4116   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4117   APInt MulVal;
4118   if (N0.getOpcode() == ISD::STEP_VECTOR)
4119     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4120       const APInt &C0 = N0.getConstantOperandAPInt(0);
4121       APInt NewStep = C0 * MulVal;
4122       return DAG.getStepVector(DL, VT, NewStep);
4123     }
4124
4125   // Fold ((mul x, 0/undef) -> 0,
4126   //       (mul x, 1) -> x) -> x)
4127   // -> and(x, mask)
4128   // We can replace vectors with '0' and '1' factors with a clearing mask.
4129   if (VT.isFixedLengthVector()) {
4130     unsigned NumElts = VT.getVectorNumElements();
4131     SmallBitVector ClearMask;
4132     ClearMask.reserve(NumElts);
4133     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4134       if (!V || V->isZero()) {
4135         ClearMask.push_back(true);
4136         return true;
4137       }
4138       ClearMask.push_back(false);
4139       return V->isOne();
4140     };
4141     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4142         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4143       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4144       EVT LegalSVT = N1.getOperand(0).getValueType();
4145       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4146       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4147       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4148       for (unsigned I = 0; I != NumElts; ++I)
4149         if (ClearMask[I])
4150           Mask[I] = Zero;
4151       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4152     }
4153   }
4154
4155   // reassociate mul
4156   if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4157     return RMUL;
4158
4159   // Simplify the operands using demanded-bits information.
4160   if (SimplifyDemandedBits(SDValue(N, 0)))
4161     return SDValue(N, 0);
4162
4163   return SDValue();
4164 }
4165
4166 /// Return true if divmod libcall is available.
4167 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4168                                      const TargetLowering &TLI) {
4169   RTLIB::Libcall LC;
4170   EVT NodeType = Node->getValueType(0);
4171   if (!NodeType.isSimple())
4172     return false;
4173   switch (NodeType.getSimpleVT().SimpleTy) {
4174   default: return false; // No libcall for vector types.
4175   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4176   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4177   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4178   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4179   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4180   }
4181
4182   return TLI.getLibcallName(LC) != nullptr;
4183 }
4184
4185 /// Issue divrem if both quotient and remainder are needed.
4186 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4187   if (Node->use_empty())
4188     return SDValue(); // This is a dead node, leave it alone.
4189
4190   unsigned Opcode = Node->getOpcode();
4191   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4192   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4193
4194   // DivMod lib calls can still work on non-legal types if using lib-calls.
4195   EVT VT = Node->getValueType(0);
4196   if (VT.isVector() || !VT.isInteger())
4197     return SDValue();
4198
4199   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4200     return SDValue();
4201
4202   // If DIVREM is going to get expanded into a libcall,
4203   // but there is no libcall available, then don't combine.
4204   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4205       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4206     return SDValue();
4207
4208   // If div is legal, it's better to do the normal expansion
4209   unsigned OtherOpcode = 0;
4210   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4211     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4212     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4213       return SDValue();
4214   } else {
4215     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4216     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4217       return SDValue();
4218   }
4219
4220   SDValue Op0 = Node->getOperand(0);
4221   SDValue Op1 = Node->getOperand(1);
4222   SDValue combined;
4223   for (SDNode *User : Op0->uses()) {
4224     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4225         User->use_empty())
4226       continue;
4227     // Convert the other matching node(s), too;
4228     // otherwise, the DIVREM may get target-legalized into something
4229     // target-specific that we won't be able to recognize.
4230     unsigned UserOpc = User->getOpcode();
4231     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4232         User->getOperand(0) == Op0 &&
4233         User->getOperand(1) == Op1) {
4234       if (!combined) {
4235         if (UserOpc == OtherOpcode) {
4236           SDVTList VTs = DAG.getVTList(VT, VT);
4237           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4238         } else if (UserOpc == DivRemOpc) {
4239           combined = SDValue(User, 0);
4240         } else {
4241           assert(UserOpc == Opcode);
4242           continue;
4243         }
4244       }
4245       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4246         CombineTo(User, combined);
4247       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4248         CombineTo(User, combined.getValue(1));
4249     }
4250   }
4251   return combined;
4252 }
4253
4254 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4255   SDValue N0 = N->getOperand(0);
4256   SDValue N1 = N->getOperand(1);
4257   EVT VT = N->getValueType(0);
4258   SDLoc DL(N);
4259
4260   unsigned Opc = N->getOpcode();
4261   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4262   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4263
4264   // X / undef -> undef
4265   // X % undef -> undef
4266   // X / 0 -> undef
4267   // X % 0 -> undef
4268   // NOTE: This includes vectors where any divisor element is zero/undef.
4269   if (DAG.isUndef(Opc, {N0, N1}))
4270     return DAG.getUNDEF(VT);
4271
4272   // undef / X -> 0
4273   // undef % X -> 0
4274   if (N0.isUndef())
4275     return DAG.getConstant(0, DL, VT);
4276
4277   // 0 / X -> 0
4278   // 0 % X -> 0
4279   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4280   if (N0C && N0C->isZero())
4281     return N0;
4282
4283   // X / X -> 1
4284   // X % X -> 0
4285   if (N0 == N1)
4286     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4287
4288   // X / 1 -> X
4289   // X % 1 -> 0
4290   // If this is a boolean op (single-bit element type), we can't have
4291   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4292   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4293   // it's a 1.
4294   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4295     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4296
4297   return SDValue();
4298 }
4299
4300 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4301   SDValue N0 = N->getOperand(0);
4302   SDValue N1 = N->getOperand(1);
4303   EVT VT = N->getValueType(0);
4304   EVT CCVT = getSetCCResultType(VT);
4305   SDLoc DL(N);
4306
4307   // fold (sdiv c1, c2) -> c1/c2
4308   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4309     return C;
4310
4311   // fold vector ops
4312   if (VT.isVector())
4313     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4314       return FoldedVOp;
4315
4316   // fold (sdiv X, -1) -> 0-X
4317   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4318   if (N1C && N1C->isAllOnes())
4319     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4320
4321   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4322   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4323     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4324                          DAG.getConstant(1, DL, VT),
4325                          DAG.getConstant(0, DL, VT));
4326
4327   if (SDValue V = simplifyDivRem(N, DAG))
4328     return V;
4329
4330   if (SDValue NewSel = foldBinOpIntoSelect(N))
4331     return NewSel;
4332
4333   // If we know the sign bits of both operands are zero, strength reduce to a
4334   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4335   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4336     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4337
4338   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4339     // If the corresponding remainder node exists, update its users with
4340     // (Dividend - (Quotient * Divisor).
4341     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4342                                               { N0, N1 })) {
4343       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4344       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4345       AddToWorklist(Mul.getNode());
4346       AddToWorklist(Sub.getNode());
4347       CombineTo(RemNode, Sub);
4348     }
4349     return V;
4350   }
4351
4352   // sdiv, srem -> sdivrem
4353   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4354   // true.  Otherwise, we break the simplification logic in visitREM().
4355   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4356   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4357     if (SDValue DivRem = useDivRem(N))
4358         return DivRem;
4359
4360   return SDValue();
4361 }
4362
4363 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4364   // Helper for determining whether a value is a power-2 constant scalar or a
4365   // vector of such elements.
4366   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4367     if (C->isZero() || C->isOpaque())
4368       return false;
4369     if (C->getAPIntValue().isPowerOf2())
4370       return true;
4371     if (C->getAPIntValue().isNegatedPowerOf2())
4372       return true;
4373     return false;
4374   };
4375
4376   return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4377 }
4378
4379 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4380   SDLoc DL(N);
4381   EVT VT = N->getValueType(0);
4382   EVT CCVT = getSetCCResultType(VT);
4383   unsigned BitWidth = VT.getScalarSizeInBits();
4384
4385   // fold (sdiv X, pow2) -> simple ops after legalize
4386   // FIXME: We check for the exact bit here because the generic lowering gives
4387   // better results in that case. The target-specific lowering should learn how
4388   // to handle exact sdivs efficiently.
4389   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4390     // Target-specific implementation of sdiv x, pow2.
4391     if (SDValue Res = BuildSDIVPow2(N))
4392       return Res;
4393
4394     // Create constants that are functions of the shift amount value.
4395     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4396     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4397     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4398     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4399     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4400     if (!isConstantOrConstantVector(Inexact))
4401       return SDValue();
4402
4403     // Splat the sign bit into the register
4404     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4405                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4406     AddToWorklist(Sign.getNode());
4407
4408     // Add (N0 < 0) ? abs2 - 1 : 0;
4409     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4410     AddToWorklist(Srl.getNode());
4411     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4412     AddToWorklist(Add.getNode());
4413     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4414     AddToWorklist(Sra.getNode());
4415
4416     // Special case: (sdiv X, 1) -> X
4417     // Special Case: (sdiv X, -1) -> 0-X
4418     SDValue One = DAG.getConstant(1, DL, VT);
4419     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4420     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4421     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4422     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4423     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4424
4425     // If dividing by a positive value, we're done. Otherwise, the result must
4426     // be negated.
4427     SDValue Zero = DAG.getConstant(0, DL, VT);
4428     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4429
4430     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4431     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4432     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4433     return Res;
4434   }
4435
4436   // If integer divide is expensive and we satisfy the requirements, emit an
4437   // alternate sequence.  Targets may check function attributes for size/speed
4438   // trade-offs.
4439   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4440   if (isConstantOrConstantVector(N1) &&
4441       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4442     if (SDValue Op = BuildSDIV(N))
4443       return Op;
4444
4445   return SDValue();
4446 }
4447
4448 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4449   SDValue N0 = N->getOperand(0);
4450   SDValue N1 = N->getOperand(1);
4451   EVT VT = N->getValueType(0);
4452   EVT CCVT = getSetCCResultType(VT);
4453   SDLoc DL(N);
4454
4455   // fold (udiv c1, c2) -> c1/c2
4456   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4457     return C;
4458
4459   // fold vector ops
4460   if (VT.isVector())
4461     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4462       return FoldedVOp;
4463
4464   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4465   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4466   if (N1C && N1C->isAllOnes())
4467     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4468                          DAG.getConstant(1, DL, VT),
4469                          DAG.getConstant(0, DL, VT));
4470
4471   if (SDValue V = simplifyDivRem(N, DAG))
4472     return V;
4473
4474   if (SDValue NewSel = foldBinOpIntoSelect(N))
4475     return NewSel;
4476
4477   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4478     // If the corresponding remainder node exists, update its users with
4479     // (Dividend - (Quotient * Divisor).
4480     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4481                                               { N0, N1 })) {
4482       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4483       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4484       AddToWorklist(Mul.getNode());
4485       AddToWorklist(Sub.getNode());
4486       CombineTo(RemNode, Sub);
4487     }
4488     return V;
4489   }
4490
4491   // sdiv, srem -> sdivrem
4492   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4493   // true.  Otherwise, we break the simplification logic in visitREM().
4494   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4495   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4496     if (SDValue DivRem = useDivRem(N))
4497         return DivRem;
4498
4499   return SDValue();
4500 }
4501
4502 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4503   SDLoc DL(N);
4504   EVT VT = N->getValueType(0);
4505
4506   // fold (udiv x, (1 << c)) -> x >>u c
4507   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4508       DAG.isKnownToBeAPowerOfTwo(N1)) {
4509     SDValue LogBase2 = BuildLogBase2(N1, DL);
4510     AddToWorklist(LogBase2.getNode());
4511
4512     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4513     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4514     AddToWorklist(Trunc.getNode());
4515     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4516   }
4517
4518   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4519   if (N1.getOpcode() == ISD::SHL) {
4520     SDValue N10 = N1.getOperand(0);
4521     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4522         DAG.isKnownToBeAPowerOfTwo(N10)) {
4523       SDValue LogBase2 = BuildLogBase2(N10, DL);
4524       AddToWorklist(LogBase2.getNode());
4525
4526       EVT ADDVT = N1.getOperand(1).getValueType();
4527       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4528       AddToWorklist(Trunc.getNode());
4529       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4530       AddToWorklist(Add.getNode());
4531       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4532     }
4533   }
4534
4535   // fold (udiv x, c) -> alternate
4536   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4537   if (isConstantOrConstantVector(N1) &&
4538       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4539     if (SDValue Op = BuildUDIV(N))
4540       return Op;
4541
4542   return SDValue();
4543 }
4544
4545 SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4546   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4547       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4548     // Target-specific implementation of srem x, pow2.
4549     if (SDValue Res = BuildSREMPow2(N))
4550       return Res;
4551   }
4552   return SDValue();
4553 }
4554
4555 // handles ISD::SREM and ISD::UREM
4556 SDValue DAGCombiner::visitREM(SDNode *N) {
4557   unsigned Opcode = N->getOpcode();
4558   SDValue N0 = N->getOperand(0);
4559   SDValue N1 = N->getOperand(1);
4560   EVT VT = N->getValueType(0);
4561   EVT CCVT = getSetCCResultType(VT);
4562
4563   bool isSigned = (Opcode == ISD::SREM);
4564   SDLoc DL(N);
4565
4566   // fold (rem c1, c2) -> c1%c2
4567   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4568     return C;
4569
4570   // fold (urem X, -1) -> select(FX == -1, 0, FX)
4571   // Freeze the numerator to avoid a miscompile with an undefined value.
4572   if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
4573     SDValue F0 = DAG.getFreeze(N0);
4574     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4575     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4576   }
4577
4578   if (SDValue V = simplifyDivRem(N, DAG))
4579     return V;
4580
4581   if (SDValue NewSel = foldBinOpIntoSelect(N))
4582     return NewSel;
4583
4584   if (isSigned) {
4585     // If we know the sign bits of both operands are zero, strength reduce to a
4586     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4587     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4588       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4589   } else {
4590     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4591       // fold (urem x, pow2) -> (and x, pow2-1)
4592       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4593       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4594       AddToWorklist(Add.getNode());
4595       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4596     }
4597     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4598     // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4599     // TODO: We should sink the following into isKnownToBePowerOfTwo
4600     // using a OrZero parameter analogous to our handling in ValueTracking.
4601     if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4602         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4603       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4604       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4605       AddToWorklist(Add.getNode());
4606       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4607     }
4608   }
4609
4610   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4611
4612   // If X/C can be simplified by the division-by-constant logic, lower
4613   // X%C to the equivalent of X-X/C*C.
4614   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4615   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4616   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4617   // combine will not return a DIVREM.  Regardless, checking cheapness here
4618   // makes sense since the simplification results in fatter code.
4619   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4620     if (isSigned) {
4621       // check if we can build faster implementation for srem
4622       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4623         return OptimizedRem;
4624     }
4625
4626     SDValue OptimizedDiv =
4627         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4628     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4629       // If the equivalent Div node also exists, update its users.
4630       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4631       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4632                                                 { N0, N1 }))
4633         CombineTo(DivNode, OptimizedDiv);
4634       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4635       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4636       AddToWorklist(OptimizedDiv.getNode());
4637       AddToWorklist(Mul.getNode());
4638       return Sub;
4639     }
4640   }
4641
4642   // sdiv, srem -> sdivrem
4643   if (SDValue DivRem = useDivRem(N))
4644     return DivRem.getValue(1);
4645
4646   return SDValue();
4647 }
4648
4649 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4650   SDValue N0 = N->getOperand(0);
4651   SDValue N1 = N->getOperand(1);
4652   EVT VT = N->getValueType(0);
4653   SDLoc DL(N);
4654
4655   // fold (mulhs c1, c2)
4656   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4657     return C;
4658
4659   // canonicalize constant to RHS.
4660   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4661       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4662     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4663
4664   if (VT.isVector()) {
4665     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4666       return FoldedVOp;
4667
4668     // fold (mulhs x, 0) -> 0
4669     // do not return N1, because undef node may exist.
4670     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4671       return DAG.getConstant(0, DL, VT);
4672   }
4673
4674   // fold (mulhs x, 0) -> 0
4675   if (isNullConstant(N1))
4676     return N1;
4677
4678   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4679   if (isOneConstant(N1))
4680     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4681                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4682                                        getShiftAmountTy(N0.getValueType())));
4683
4684   // fold (mulhs x, undef) -> 0
4685   if (N0.isUndef() || N1.isUndef())
4686     return DAG.getConstant(0, DL, VT);
4687
4688   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4689   // plus a shift.
4690   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4691       !VT.isVector()) {
4692     MVT Simple = VT.getSimpleVT();
4693     unsigned SimpleSize = Simple.getSizeInBits();
4694     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4695     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4696       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4697       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4698       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4699       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4700             DAG.getConstant(SimpleSize, DL,
4701                             getShiftAmountTy(N1.getValueType())));
4702       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4703     }
4704   }
4705
4706   return SDValue();
4707 }
4708
4709 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4710   SDValue N0 = N->getOperand(0);
4711   SDValue N1 = N->getOperand(1);
4712   EVT VT = N->getValueType(0);
4713   SDLoc DL(N);
4714
4715   // fold (mulhu c1, c2)
4716   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4717     return C;
4718
4719   // canonicalize constant to RHS.
4720   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4721       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4722     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4723
4724   if (VT.isVector()) {
4725     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4726       return FoldedVOp;
4727
4728     // fold (mulhu x, 0) -> 0
4729     // do not return N1, because undef node may exist.
4730     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4731       return DAG.getConstant(0, DL, VT);
4732   }
4733
4734   // fold (mulhu x, 0) -> 0
4735   if (isNullConstant(N1))
4736     return N1;
4737
4738   // fold (mulhu x, 1) -> 0
4739   if (isOneConstant(N1))
4740     return DAG.getConstant(0, DL, N0.getValueType());
4741
4742   // fold (mulhu x, undef) -> 0
4743   if (N0.isUndef() || N1.isUndef())
4744     return DAG.getConstant(0, DL, VT);
4745
4746   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4747   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4748       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4749     unsigned NumEltBits = VT.getScalarSizeInBits();
4750     SDValue LogBase2 = BuildLogBase2(N1, DL);
4751     SDValue SRLAmt = DAG.getNode(
4752         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4753     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4754     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4755     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4756   }
4757
4758   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4759   // plus a shift.
4760   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4761       !VT.isVector()) {
4762     MVT Simple = VT.getSimpleVT();
4763     unsigned SimpleSize = Simple.getSizeInBits();
4764     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4765     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4766       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4767       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4768       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4769       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4770             DAG.getConstant(SimpleSize, DL,
4771                             getShiftAmountTy(N1.getValueType())));
4772       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4773     }
4774   }
4775
4776   // Simplify the operands using demanded-bits information.
4777   // We don't have demanded bits support for MULHU so this just enables constant
4778   // folding based on known bits.
4779   if (SimplifyDemandedBits(SDValue(N, 0)))
4780     return SDValue(N, 0);
4781
4782   return SDValue();
4783 }
4784
4785 SDValue DAGCombiner::visitAVG(SDNode *N) {
4786   unsigned Opcode = N->getOpcode();
4787   SDValue N0 = N->getOperand(0);
4788   SDValue N1 = N->getOperand(1);
4789   EVT VT = N->getValueType(0);
4790   SDLoc DL(N);
4791
4792   // fold (avg c1, c2)
4793   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4794     return C;
4795
4796   // canonicalize constant to RHS.
4797   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4798       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4799     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4800
4801   if (VT.isVector()) {
4802     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4803       return FoldedVOp;
4804
4805     // fold (avgfloor x, 0) -> x >> 1
4806     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4807       if (Opcode == ISD::AVGFLOORS)
4808         return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
4809       if (Opcode == ISD::AVGFLOORU)
4810         return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
4811     }
4812   }
4813
4814   // fold (avg x, undef) -> x
4815   if (N0.isUndef())
4816     return N1;
4817   if (N1.isUndef())
4818     return N0;
4819
4820   // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
4821
4822   return SDValue();
4823 }
4824
4825 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4826 /// give the opcodes for the two computations that are being performed. Return
4827 /// true if a simplification was made.
4828 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4829                                                 unsigned HiOp) {
4830   // If the high half is not needed, just compute the low half.
4831   bool HiExists = N->hasAnyUseOfValue(1);
4832   if (!HiExists && (!LegalOperations ||
4833                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4834     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4835     return CombineTo(N, Res, Res);
4836   }
4837
4838   // If the low half is not needed, just compute the high half.
4839   bool LoExists = N->hasAnyUseOfValue(0);
4840   if (!LoExists && (!LegalOperations ||
4841                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4842     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4843     return CombineTo(N, Res, Res);
4844   }
4845
4846   // If both halves are used, return as it is.
4847   if (LoExists && HiExists)
4848     return SDValue();
4849
4850   // If the two computed results can be simplified separately, separate them.
4851   if (LoExists) {
4852     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4853     AddToWorklist(Lo.getNode());
4854     SDValue LoOpt = combine(Lo.getNode());
4855     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4856         (!LegalOperations ||
4857          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4858       return CombineTo(N, LoOpt, LoOpt);
4859   }
4860
4861   if (HiExists) {
4862     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4863     AddToWorklist(Hi.getNode());
4864     SDValue HiOpt = combine(Hi.getNode());
4865     if (HiOpt.getNode() && HiOpt != Hi &&
4866         (!LegalOperations ||
4867          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4868       return CombineTo(N, HiOpt, HiOpt);
4869   }
4870
4871   return SDValue();
4872 }
4873
4874 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4875   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4876     return Res;
4877
4878   SDValue N0 = N->getOperand(0);
4879   SDValue N1 = N->getOperand(1);
4880   EVT VT = N->getValueType(0);
4881   SDLoc DL(N);
4882
4883   // canonicalize constant to RHS (vector doesn't have to splat)
4884   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4885       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4886     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
4887
4888   // If the type is twice as wide is legal, transform the mulhu to a wider
4889   // multiply plus a shift.
4890   if (VT.isSimple() && !VT.isVector()) {
4891     MVT Simple = VT.getSimpleVT();
4892     unsigned SimpleSize = Simple.getSizeInBits();
4893     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4894     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4895       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4896       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4897       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4898       // Compute the high part as N1.
4899       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4900             DAG.getConstant(SimpleSize, DL,
4901                             getShiftAmountTy(Lo.getValueType())));
4902       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4903       // Compute the low part as N0.
4904       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4905       return CombineTo(N, Lo, Hi);
4906     }
4907   }
4908
4909   return SDValue();
4910 }
4911
4912 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4913   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4914     return Res;
4915
4916   SDValue N0 = N->getOperand(0);
4917   SDValue N1 = N->getOperand(1);
4918   EVT VT = N->getValueType(0);
4919   SDLoc DL(N);
4920
4921   // canonicalize constant to RHS (vector doesn't have to splat)
4922   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4923       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4924     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
4925
4926   // (umul_lohi N0, 0) -> (0, 0)
4927   if (isNullConstant(N1)) {
4928     SDValue Zero = DAG.getConstant(0, DL, VT);
4929     return CombineTo(N, Zero, Zero);
4930   }
4931
4932   // (umul_lohi N0, 1) -> (N0, 0)
4933   if (isOneConstant(N1)) {
4934     SDValue Zero = DAG.getConstant(0, DL, VT);
4935     return CombineTo(N, N0, Zero);
4936   }
4937
4938   // If the type is twice as wide is legal, transform the mulhu to a wider
4939   // multiply plus a shift.
4940   if (VT.isSimple() && !VT.isVector()) {
4941     MVT Simple = VT.getSimpleVT();
4942     unsigned SimpleSize = Simple.getSizeInBits();
4943     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4944     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4945       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4946       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4947       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4948       // Compute the high part as N1.
4949       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4950             DAG.getConstant(SimpleSize, DL,
4951                             getShiftAmountTy(Lo.getValueType())));
4952       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4953       // Compute the low part as N0.
4954       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4955       return CombineTo(N, Lo, Hi);
4956     }
4957   }
4958
4959   return SDValue();
4960 }
4961
4962 SDValue DAGCombiner::visitMULO(SDNode *N) {
4963   SDValue N0 = N->getOperand(0);
4964   SDValue N1 = N->getOperand(1);
4965   EVT VT = N0.getValueType();
4966   bool IsSigned = (ISD::SMULO == N->getOpcode());
4967
4968   EVT CarryVT = N->getValueType(1);
4969   SDLoc DL(N);
4970
4971   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4972   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4973
4974   // fold operation with constant operands.
4975   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4976   // multiple results.
4977   if (N0C && N1C) {
4978     bool Overflow;
4979     APInt Result =
4980         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4981                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4982     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4983                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4984   }
4985
4986   // canonicalize constant to RHS.
4987   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4988       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4989     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4990
4991   // fold (mulo x, 0) -> 0 + no carry out
4992   if (isNullOrNullSplat(N1))
4993     return CombineTo(N, DAG.getConstant(0, DL, VT),
4994                      DAG.getConstant(0, DL, CarryVT));
4995
4996   // (mulo x, 2) -> (addo x, x)
4997   // FIXME: This needs a freeze.
4998   if (N1C && N1C->getAPIntValue() == 2 &&
4999       (!IsSigned || VT.getScalarSizeInBits() > 2))
5000     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5001                        N->getVTList(), N0, N0);
5002
5003   if (IsSigned) {
5004     // A 1 bit SMULO overflows if both inputs are 1.
5005     if (VT.getScalarSizeInBits() == 1) {
5006       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5007       return CombineTo(N, And,
5008                        DAG.getSetCC(DL, CarryVT, And,
5009                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
5010     }
5011
5012     // Multiplying n * m significant bits yields a result of n + m significant
5013     // bits. If the total number of significant bits does not exceed the
5014     // result bit width (minus 1), there is no overflow.
5015     unsigned SignBits = DAG.ComputeNumSignBits(N0);
5016     if (SignBits > 1)
5017       SignBits += DAG.ComputeNumSignBits(N1);
5018     if (SignBits > VT.getScalarSizeInBits() + 1)
5019       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5020                        DAG.getConstant(0, DL, CarryVT));
5021   } else {
5022     KnownBits N1Known = DAG.computeKnownBits(N1);
5023     KnownBits N0Known = DAG.computeKnownBits(N0);
5024     bool Overflow;
5025     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
5026     if (!Overflow)
5027       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5028                        DAG.getConstant(0, DL, CarryVT));
5029   }
5030
5031   return SDValue();
5032 }
5033
5034 // Function to calculate whether the Min/Max pair of SDNodes (potentially
5035 // swapped around) make a signed saturate pattern, clamping to between a signed
5036 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5037 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
5038 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5039 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
5040 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5041                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
5042                                   bool &Unsigned) {
5043   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5044                             ISD::CondCode CC) {
5045     // The compare and select operand should be the same or the select operands
5046     // should be truncated versions of the comparison.
5047     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5048       return 0;
5049     // The constants need to be the same or a truncated version of each other.
5050     ConstantSDNode *N1C = isConstOrConstSplat(N1);
5051     ConstantSDNode *N3C = isConstOrConstSplat(N3);
5052     if (!N1C || !N3C)
5053       return 0;
5054     const APInt &C1 = N1C->getAPIntValue();
5055     const APInt &C2 = N3C->getAPIntValue();
5056     if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5057       return 0;
5058     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5059   };
5060
5061   // Check the initial value is a SMIN/SMAX equivalent.
5062   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5063   if (!Opcode0)
5064     return SDValue();
5065
5066   SDValue N00, N01, N02, N03;
5067   ISD::CondCode N0CC;
5068   switch (N0.getOpcode()) {
5069   case ISD::SMIN:
5070   case ISD::SMAX:
5071     N00 = N02 = N0.getOperand(0);
5072     N01 = N03 = N0.getOperand(1);
5073     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5074     break;
5075   case ISD::SELECT_CC:
5076     N00 = N0.getOperand(0);
5077     N01 = N0.getOperand(1);
5078     N02 = N0.getOperand(2);
5079     N03 = N0.getOperand(3);
5080     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5081     break;
5082   case ISD::SELECT:
5083   case ISD::VSELECT:
5084     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5085       return SDValue();
5086     N00 = N0.getOperand(0).getOperand(0);
5087     N01 = N0.getOperand(0).getOperand(1);
5088     N02 = N0.getOperand(1);
5089     N03 = N0.getOperand(2);
5090     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5091     break;
5092   default:
5093     return SDValue();
5094   }
5095
5096   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5097   if (!Opcode1 || Opcode0 == Opcode1)
5098     return SDValue();
5099
5100   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5101   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5102   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5103     return SDValue();
5104
5105   const APInt &MinC = MinCOp->getAPIntValue();
5106   const APInt &MaxC = MaxCOp->getAPIntValue();
5107   APInt MinCPlus1 = MinC + 1;
5108   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5109     BW = MinCPlus1.exactLogBase2() + 1;
5110     Unsigned = false;
5111     return N02;
5112   }
5113
5114   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5115     BW = MinCPlus1.exactLogBase2();
5116     Unsigned = true;
5117     return N02;
5118   }
5119
5120   return SDValue();
5121 }
5122
5123 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5124                                            SDValue N3, ISD::CondCode CC,
5125                                            SelectionDAG &DAG) {
5126   unsigned BW;
5127   bool Unsigned;
5128   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
5129   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5130     return SDValue();
5131   EVT FPVT = Fp.getOperand(0).getValueType();
5132   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5133   if (FPVT.isVector())
5134     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5135                              FPVT.getVectorElementCount());
5136   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5137   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5138     return SDValue();
5139   SDLoc DL(Fp);
5140   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5141                             DAG.getValueType(NewVT.getScalarType()));
5142   return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
5143                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
5144 }
5145
5146 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5147                                          SDValue N3, ISD::CondCode CC,
5148                                          SelectionDAG &DAG) {
5149   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5150   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5151   // be truncated versions of the the setcc (N0/N1).
5152   if ((N0 != N2 &&
5153        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5154       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5155     return SDValue();
5156   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5157   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5158   if (!N1C || !N3C)
5159     return SDValue();
5160   const APInt &C1 = N1C->getAPIntValue();
5161   const APInt &C3 = N3C->getAPIntValue();
5162   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5163       C1 != C3.zext(C1.getBitWidth()))
5164     return SDValue();
5165
5166   unsigned BW = (C1 + 1).exactLogBase2();
5167   EVT FPVT = N0.getOperand(0).getValueType();
5168   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5169   if (FPVT.isVector())
5170     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5171                              FPVT.getVectorElementCount());
5172   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5173                                                         FPVT, NewVT))
5174     return SDValue();
5175
5176   SDValue Sat =
5177       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5178                   DAG.getValueType(NewVT.getScalarType()));
5179   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5180 }
5181
5182 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5183   SDValue N0 = N->getOperand(0);
5184   SDValue N1 = N->getOperand(1);
5185   EVT VT = N0.getValueType();
5186   unsigned Opcode = N->getOpcode();
5187   SDLoc DL(N);
5188
5189   // fold operation with constant operands.
5190   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5191     return C;
5192
5193   // If the operands are the same, this is a no-op.
5194   if (N0 == N1)
5195     return N0;
5196
5197   // canonicalize constant to RHS
5198   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5199       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5200     return DAG.getNode(Opcode, DL, VT, N1, N0);
5201
5202   // fold vector ops
5203   if (VT.isVector())
5204     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5205       return FoldedVOp;
5206
5207   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5208   // Only do this if the current op isn't legal and the flipped is.
5209   if (!TLI.isOperationLegal(Opcode, VT) &&
5210       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5211       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5212     unsigned AltOpcode;
5213     switch (Opcode) {
5214     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5215     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5216     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5217     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5218     default: llvm_unreachable("Unknown MINMAX opcode");
5219     }
5220     if (TLI.isOperationLegal(AltOpcode, VT))
5221       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5222   }
5223
5224   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5225     if (SDValue S = PerformMinMaxFpToSatCombine(
5226             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5227       return S;
5228   if (Opcode == ISD::UMIN)
5229     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5230       return S;
5231
5232   // Simplify the operands using demanded-bits information.
5233   if (SimplifyDemandedBits(SDValue(N, 0)))
5234     return SDValue(N, 0);
5235
5236   return SDValue();
5237 }
5238
5239 /// If this is a bitwise logic instruction and both operands have the same
5240 /// opcode, try to sink the other opcode after the logic instruction.
5241 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5242   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5243   EVT VT = N0.getValueType();
5244   unsigned LogicOpcode = N->getOpcode();
5245   unsigned HandOpcode = N0.getOpcode();
5246   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5247           LogicOpcode == ISD::XOR) && "Expected logic opcode");
5248   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5249
5250   // Bail early if none of these transforms apply.
5251   if (N0.getNumOperands() == 0)
5252     return SDValue();
5253
5254   // FIXME: We should check number of uses of the operands to not increase
5255   //        the instruction count for all transforms.
5256
5257   // Handle size-changing casts.
5258   SDValue X = N0.getOperand(0);
5259   SDValue Y = N1.getOperand(0);
5260   EVT XVT = X.getValueType();
5261   SDLoc DL(N);
5262   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5263       HandOpcode == ISD::SIGN_EXTEND) {
5264     // If both operands have other uses, this transform would create extra
5265     // instructions without eliminating anything.
5266     if (!N0.hasOneUse() && !N1.hasOneUse())
5267       return SDValue();
5268     // We need matching integer source types.
5269     if (XVT != Y.getValueType())
5270       return SDValue();
5271     // Don't create an illegal op during or after legalization. Don't ever
5272     // create an unsupported vector op.
5273     if ((VT.isVector() || LegalOperations) &&
5274         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5275       return SDValue();
5276     // Avoid infinite looping with PromoteIntBinOp.
5277     // TODO: Should we apply desirable/legal constraints to all opcodes?
5278     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5279         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5280       return SDValue();
5281     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5282     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5283     return DAG.getNode(HandOpcode, DL, VT, Logic);
5284   }
5285
5286   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5287   if (HandOpcode == ISD::TRUNCATE) {
5288     // If both operands have other uses, this transform would create extra
5289     // instructions without eliminating anything.
5290     if (!N0.hasOneUse() && !N1.hasOneUse())
5291       return SDValue();
5292     // We need matching source types.
5293     if (XVT != Y.getValueType())
5294       return SDValue();
5295     // Don't create an illegal op during or after legalization.
5296     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5297       return SDValue();
5298     // Be extra careful sinking truncate. If it's free, there's no benefit in
5299     // widening a binop. Also, don't create a logic op on an illegal type.
5300     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5301       return SDValue();
5302     if (!TLI.isTypeLegal(XVT))
5303       return SDValue();
5304     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5305     return DAG.getNode(HandOpcode, DL, VT, Logic);
5306   }
5307
5308   // For binops SHL/SRL/SRA/AND:
5309   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5310   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5311        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5312       N0.getOperand(1) == N1.getOperand(1)) {
5313     // If either operand has other uses, this transform is not an improvement.
5314     if (!N0.hasOneUse() || !N1.hasOneUse())
5315       return SDValue();
5316     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5317     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5318   }
5319
5320   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5321   if (HandOpcode == ISD::BSWAP) {
5322     // If either operand has other uses, this transform is not an improvement.
5323     if (!N0.hasOneUse() || !N1.hasOneUse())
5324       return SDValue();
5325     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5326     return DAG.getNode(HandOpcode, DL, VT, Logic);
5327   }
5328
5329   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5330   // Only perform this optimization up until type legalization, before
5331   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5332   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5333   // we don't want to undo this promotion.
5334   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5335   // on scalars.
5336   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5337        Level <= AfterLegalizeTypes) {
5338     // Input types must be integer and the same.
5339     if (XVT.isInteger() && XVT == Y.getValueType() &&
5340         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5341           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5342       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5343       return DAG.getNode(HandOpcode, DL, VT, Logic);
5344     }
5345   }
5346
5347   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5348   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5349   // If both shuffles use the same mask, and both shuffle within a single
5350   // vector, then it is worthwhile to move the swizzle after the operation.
5351   // The type-legalizer generates this pattern when loading illegal
5352   // vector types from memory. In many cases this allows additional shuffle
5353   // optimizations.
5354   // There are other cases where moving the shuffle after the xor/and/or
5355   // is profitable even if shuffles don't perform a swizzle.
5356   // If both shuffles use the same mask, and both shuffles have the same first
5357   // or second operand, then it might still be profitable to move the shuffle
5358   // after the xor/and/or operation.
5359   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5360     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5361     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5362     assert(X.getValueType() == Y.getValueType() &&
5363            "Inputs to shuffles are not the same type");
5364
5365     // Check that both shuffles use the same mask. The masks are known to be of
5366     // the same length because the result vector type is the same.
5367     // Check also that shuffles have only one use to avoid introducing extra
5368     // instructions.
5369     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5370         !SVN0->getMask().equals(SVN1->getMask()))
5371       return SDValue();
5372
5373     // Don't try to fold this node if it requires introducing a
5374     // build vector of all zeros that might be illegal at this stage.
5375     SDValue ShOp = N0.getOperand(1);
5376     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5377       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5378
5379     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5380     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5381       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5382                                   N0.getOperand(0), N1.getOperand(0));
5383       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5384     }
5385
5386     // Don't try to fold this node if it requires introducing a
5387     // build vector of all zeros that might be illegal at this stage.
5388     ShOp = N0.getOperand(0);
5389     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5390       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5391
5392     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5393     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5394       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5395                                   N1.getOperand(1));
5396       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5397     }
5398   }
5399
5400   return SDValue();
5401 }
5402
5403 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5404 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5405                                        const SDLoc &DL) {
5406   SDValue LL, LR, RL, RR, N0CC, N1CC;
5407   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5408       !isSetCCEquivalent(N1, RL, RR, N1CC))
5409     return SDValue();
5410
5411   assert(N0.getValueType() == N1.getValueType() &&
5412          "Unexpected operand types for bitwise logic op");
5413   assert(LL.getValueType() == LR.getValueType() &&
5414          RL.getValueType() == RR.getValueType() &&
5415          "Unexpected operand types for setcc");
5416
5417   // If we're here post-legalization or the logic op type is not i1, the logic
5418   // op type must match a setcc result type. Also, all folds require new
5419   // operations on the left and right operands, so those types must match.
5420   EVT VT = N0.getValueType();
5421   EVT OpVT = LL.getValueType();
5422   if (LegalOperations || VT.getScalarType() != MVT::i1)
5423     if (VT != getSetCCResultType(OpVT))
5424       return SDValue();
5425   if (OpVT != RL.getValueType())
5426     return SDValue();
5427
5428   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5429   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5430   bool IsInteger = OpVT.isInteger();
5431   if (LR == RR && CC0 == CC1 && IsInteger) {
5432     bool IsZero = isNullOrNullSplat(LR);
5433     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5434
5435     // All bits clear?
5436     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5437     // All sign bits clear?
5438     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5439     // Any bits set?
5440     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5441     // Any sign bits set?
5442     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5443
5444     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5445     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5446     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5447     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5448     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5449       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5450       AddToWorklist(Or.getNode());
5451       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5452     }
5453
5454     // All bits set?
5455     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5456     // All sign bits set?
5457     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5458     // Any bits clear?
5459     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5460     // Any sign bits clear?
5461     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5462
5463     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5464     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5465     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5466     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5467     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5468       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5469       AddToWorklist(And.getNode());
5470       return DAG.getSetCC(DL, VT, And, LR, CC1);
5471     }
5472   }
5473
5474   // TODO: What is the 'or' equivalent of this fold?
5475   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5476   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5477       IsInteger && CC0 == ISD::SETNE &&
5478       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5479        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5480     SDValue One = DAG.getConstant(1, DL, OpVT);
5481     SDValue Two = DAG.getConstant(2, DL, OpVT);
5482     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5483     AddToWorklist(Add.getNode());
5484     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5485   }
5486
5487   // Try more general transforms if the predicates match and the only user of
5488   // the compares is the 'and' or 'or'.
5489   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5490       N0.hasOneUse() && N1.hasOneUse()) {
5491     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5492     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5493     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5494       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5495       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5496       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5497       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5498       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5499     }
5500
5501     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5502     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5503       // Match a shared variable operand and 2 non-opaque constant operands.
5504       auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5505         // The difference of the constants must be a single bit.
5506         const APInt &CMax =
5507             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5508         const APInt &CMin =
5509             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5510         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5511       };
5512       if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5513         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5514         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5515         SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5516         SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5517         SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5518         SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5519         SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5520         SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5521         SDValue Zero = DAG.getConstant(0, DL, OpVT);
5522         return DAG.getSetCC(DL, VT, And, Zero, CC0);
5523       }
5524     }
5525   }
5526
5527   // Canonicalize equivalent operands to LL == RL.
5528   if (LL == RR && LR == RL) {
5529     CC1 = ISD::getSetCCSwappedOperands(CC1);
5530     std::swap(RL, RR);
5531   }
5532
5533   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5534   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5535   if (LL == RL && LR == RR) {
5536     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5537                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5538     if (NewCC != ISD::SETCC_INVALID &&
5539         (!LegalOperations ||
5540          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5541           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5542       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5543   }
5544
5545   return SDValue();
5546 }
5547
5548 /// This contains all DAGCombine rules which reduce two values combined by
5549 /// an And operation to a single value. This makes them reusable in the context
5550 /// of visitSELECT(). Rules involving constants are not included as
5551 /// visitSELECT() already handles those cases.
5552 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5553   EVT VT = N1.getValueType();
5554   SDLoc DL(N);
5555
5556   // fold (and x, undef) -> 0
5557   if (N0.isUndef() || N1.isUndef())
5558     return DAG.getConstant(0, DL, VT);
5559
5560   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5561     return V;
5562
5563   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5564   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5565       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5566     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5567       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5568         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5569         // immediate for an add, but it is legal if its top c2 bits are set,
5570         // transform the ADD so the immediate doesn't need to be materialized
5571         // in a register.
5572         APInt ADDC = ADDI->getAPIntValue();
5573         APInt SRLC = SRLI->getAPIntValue();
5574         if (ADDC.getMinSignedBits() <= 64 &&
5575             SRLC.ult(VT.getSizeInBits()) &&
5576             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5577           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5578                                              SRLC.getZExtValue());
5579           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5580             ADDC |= Mask;
5581             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5582               SDLoc DL0(N0);
5583               SDValue NewAdd =
5584                 DAG.getNode(ISD::ADD, DL0, VT,
5585                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5586               CombineTo(N0.getNode(), NewAdd);
5587               // Return N so it doesn't get rechecked!
5588               return SDValue(N, 0);
5589             }
5590           }
5591         }
5592       }
5593     }
5594   }
5595
5596   // Reduce bit extract of low half of an integer to the narrower type.
5597   // (and (srl i64:x, K), KMask) ->
5598   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5599   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5600     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5601       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5602         unsigned Size = VT.getSizeInBits();
5603         const APInt &AndMask = CAnd->getAPIntValue();
5604         unsigned ShiftBits = CShift->getZExtValue();
5605
5606         // Bail out, this node will probably disappear anyway.
5607         if (ShiftBits == 0)
5608           return SDValue();
5609
5610         unsigned MaskBits = AndMask.countTrailingOnes();
5611         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5612
5613         if (AndMask.isMask() &&
5614             // Required bits must not span the two halves of the integer and
5615             // must fit in the half size type.
5616             (ShiftBits + MaskBits <= Size / 2) &&
5617             TLI.isNarrowingProfitable(VT, HalfVT) &&
5618             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5619             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5620             TLI.isTruncateFree(VT, HalfVT) &&
5621             TLI.isZExtFree(HalfVT, VT)) {
5622           // The isNarrowingProfitable is to avoid regressions on PPC and
5623           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5624           // on downstream users of this. Those patterns could probably be
5625           // extended to handle extensions mixed in.
5626
5627           SDValue SL(N0);
5628           assert(MaskBits <= Size);
5629
5630           // Extracting the highest bit of the low half.
5631           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5632           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5633                                       N0.getOperand(0));
5634
5635           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5636           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5637           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5638           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5639           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5640         }
5641       }
5642     }
5643   }
5644
5645   return SDValue();
5646 }
5647
5648 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5649                                    EVT LoadResultTy, EVT &ExtVT) {
5650   if (!AndC->getAPIntValue().isMask())
5651     return false;
5652
5653   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5654
5655   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5656   EVT LoadedVT = LoadN->getMemoryVT();
5657
5658   if (ExtVT == LoadedVT &&
5659       (!LegalOperations ||
5660        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5661     // ZEXTLOAD will match without needing to change the size of the value being
5662     // loaded.
5663     return true;
5664   }
5665
5666   // Do not change the width of a volatile or atomic loads.
5667   if (!LoadN->isSimple())
5668     return false;
5669
5670   // Do not generate loads of non-round integer types since these can
5671   // be expensive (and would be wrong if the type is not byte sized).
5672   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5673     return false;
5674
5675   if (LegalOperations &&
5676       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5677     return false;
5678
5679   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5680     return false;
5681
5682   return true;
5683 }
5684
5685 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5686                                     ISD::LoadExtType ExtType, EVT &MemVT,
5687                                     unsigned ShAmt) {
5688   if (!LDST)
5689     return false;
5690   // Only allow byte offsets.
5691   if (ShAmt % 8)
5692     return false;
5693
5694   // Do not generate loads of non-round integer types since these can
5695   // be expensive (and would be wrong if the type is not byte sized).
5696   if (!MemVT.isRound())
5697     return false;
5698
5699   // Don't change the width of a volatile or atomic loads.
5700   if (!LDST->isSimple())
5701     return false;
5702
5703   EVT LdStMemVT = LDST->getMemoryVT();
5704
5705   // Bail out when changing the scalable property, since we can't be sure that
5706   // we're actually narrowing here.
5707   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5708     return false;
5709
5710   // Verify that we are actually reducing a load width here.
5711   if (LdStMemVT.bitsLT(MemVT))
5712     return false;
5713
5714   // Ensure that this isn't going to produce an unsupported memory access.
5715   if (ShAmt) {
5716     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5717     const unsigned ByteShAmt = ShAmt / 8;
5718     const Align LDSTAlign = LDST->getAlign();
5719     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5720     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5721                                 LDST->getAddressSpace(), NarrowAlign,
5722                                 LDST->getMemOperand()->getFlags()))
5723       return false;
5724   }
5725
5726   // It's not possible to generate a constant of extended or untyped type.
5727   EVT PtrType = LDST->getBasePtr().getValueType();
5728   if (PtrType == MVT::Untyped || PtrType.isExtended())
5729     return false;
5730
5731   if (isa<LoadSDNode>(LDST)) {
5732     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5733     // Don't transform one with multiple uses, this would require adding a new
5734     // load.
5735     if (!SDValue(Load, 0).hasOneUse())
5736       return false;
5737
5738     if (LegalOperations &&
5739         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5740       return false;
5741
5742     // For the transform to be legal, the load must produce only two values
5743     // (the value loaded and the chain).  Don't transform a pre-increment
5744     // load, for example, which produces an extra value.  Otherwise the
5745     // transformation is not equivalent, and the downstream logic to replace
5746     // uses gets things wrong.
5747     if (Load->getNumValues() > 2)
5748       return false;
5749
5750     // If the load that we're shrinking is an extload and we're not just
5751     // discarding the extension we can't simply shrink the load. Bail.
5752     // TODO: It would be possible to merge the extensions in some cases.
5753     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5754         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5755       return false;
5756
5757     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5758       return false;
5759   } else {
5760     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5761     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5762     // Can't write outside the original store
5763     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5764       return false;
5765
5766     if (LegalOperations &&
5767         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5768       return false;
5769   }
5770   return true;
5771 }
5772
5773 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5774                                     SmallVectorImpl<LoadSDNode*> &Loads,
5775                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5776                                     ConstantSDNode *Mask,
5777                                     SDNode *&NodeToMask) {
5778   // Recursively search for the operands, looking for loads which can be
5779   // narrowed.
5780   for (SDValue Op : N->op_values()) {
5781     if (Op.getValueType().isVector())
5782       return false;
5783
5784     // Some constants may need fixing up later if they are too large.
5785     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5786       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5787           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5788         NodesWithConsts.insert(N);
5789       continue;
5790     }
5791
5792     if (!Op.hasOneUse())
5793       return false;
5794
5795     switch(Op.getOpcode()) {
5796     case ISD::LOAD: {
5797       auto *Load = cast<LoadSDNode>(Op);
5798       EVT ExtVT;
5799       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5800           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5801
5802         // ZEXTLOAD is already small enough.
5803         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5804             ExtVT.bitsGE(Load->getMemoryVT()))
5805           continue;
5806
5807         // Use LE to convert equal sized loads to zext.
5808         if (ExtVT.bitsLE(Load->getMemoryVT()))
5809           Loads.push_back(Load);
5810
5811         continue;
5812       }
5813       return false;
5814     }
5815     case ISD::ZERO_EXTEND:
5816     case ISD::AssertZext: {
5817       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5818       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5819       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5820         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5821         Op.getOperand(0).getValueType();
5822
5823       // We can accept extending nodes if the mask is wider or an equal
5824       // width to the original type.
5825       if (ExtVT.bitsGE(VT))
5826         continue;
5827       break;
5828     }
5829     case ISD::OR:
5830     case ISD::XOR:
5831     case ISD::AND:
5832       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5833                              NodeToMask))
5834         return false;
5835       continue;
5836     }
5837
5838     // Allow one node which will masked along with any loads found.
5839     if (NodeToMask)
5840       return false;
5841
5842     // Also ensure that the node to be masked only produces one data result.
5843     NodeToMask = Op.getNode();
5844     if (NodeToMask->getNumValues() > 1) {
5845       bool HasValue = false;
5846       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5847         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5848         if (VT != MVT::Glue && VT != MVT::Other) {
5849           if (HasValue) {
5850             NodeToMask = nullptr;
5851             return false;
5852           }
5853           HasValue = true;
5854         }
5855       }
5856       assert(HasValue && "Node to be masked has no data result?");
5857     }
5858   }
5859   return true;
5860 }
5861
5862 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5863   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5864   if (!Mask)
5865     return false;
5866
5867   if (!Mask->getAPIntValue().isMask())
5868     return false;
5869
5870   // No need to do anything if the and directly uses a load.
5871   if (isa<LoadSDNode>(N->getOperand(0)))
5872     return false;
5873
5874   SmallVector<LoadSDNode*, 8> Loads;
5875   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5876   SDNode *FixupNode = nullptr;
5877   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5878     if (Loads.size() == 0)
5879       return false;
5880
5881     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5882     SDValue MaskOp = N->getOperand(1);
5883
5884     // If it exists, fixup the single node we allow in the tree that needs
5885     // masking.
5886     if (FixupNode) {
5887       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5888       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5889                                 FixupNode->getValueType(0),
5890                                 SDValue(FixupNode, 0), MaskOp);
5891       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5892       if (And.getOpcode() == ISD ::AND)
5893         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5894     }
5895
5896     // Narrow any constants that need it.
5897     for (auto *LogicN : NodesWithConsts) {
5898       SDValue Op0 = LogicN->getOperand(0);
5899       SDValue Op1 = LogicN->getOperand(1);
5900
5901       if (isa<ConstantSDNode>(Op0))
5902           std::swap(Op0, Op1);
5903
5904       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5905                                 Op1, MaskOp);
5906
5907       DAG.UpdateNodeOperands(LogicN, Op0, And);
5908     }
5909
5910     // Create narrow loads.
5911     for (auto *Load : Loads) {
5912       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5913       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5914                                 SDValue(Load, 0), MaskOp);
5915       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5916       if (And.getOpcode() == ISD ::AND)
5917         And = SDValue(
5918             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5919       SDValue NewLoad = reduceLoadWidth(And.getNode());
5920       assert(NewLoad &&
5921              "Shouldn't be masking the load if it can't be narrowed");
5922       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5923     }
5924     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5925     return true;
5926   }
5927   return false;
5928 }
5929
5930 // Unfold
5931 //    x &  (-1 'logical shift' y)
5932 // To
5933 //    (x 'opposite logical shift' y) 'logical shift' y
5934 // if it is better for performance.
5935 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5936   assert(N->getOpcode() == ISD::AND);
5937
5938   SDValue N0 = N->getOperand(0);
5939   SDValue N1 = N->getOperand(1);
5940
5941   // Do we actually prefer shifts over mask?
5942   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5943     return SDValue();
5944
5945   // Try to match  (-1 '[outer] logical shift' y)
5946   unsigned OuterShift;
5947   unsigned InnerShift; // The opposite direction to the OuterShift.
5948   SDValue Y;           // Shift amount.
5949   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5950     if (!M.hasOneUse())
5951       return false;
5952     OuterShift = M->getOpcode();
5953     if (OuterShift == ISD::SHL)
5954       InnerShift = ISD::SRL;
5955     else if (OuterShift == ISD::SRL)
5956       InnerShift = ISD::SHL;
5957     else
5958       return false;
5959     if (!isAllOnesConstant(M->getOperand(0)))
5960       return false;
5961     Y = M->getOperand(1);
5962     return true;
5963   };
5964
5965   SDValue X;
5966   if (matchMask(N1))
5967     X = N0;
5968   else if (matchMask(N0))
5969     X = N1;
5970   else
5971     return SDValue();
5972
5973   SDLoc DL(N);
5974   EVT VT = N->getValueType(0);
5975
5976   //     tmp = x   'opposite logical shift' y
5977   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5978   //     ret = tmp 'logical shift' y
5979   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5980
5981   return T1;
5982 }
5983
5984 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5985 /// For a target with a bit test, this is expected to become test + set and save
5986 /// at least 1 instruction.
5987 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5988   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5989
5990   // This is probably not worthwhile without a supported type.
5991   EVT VT = And->getValueType(0);
5992   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5993   if (!TLI.isTypeLegal(VT))
5994     return SDValue();
5995
5996   // Look through an optional extension.
5997   SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
5998   if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
5999     And0 = And0.getOperand(0);
6000   if (!isOneConstant(And1) || !And0.hasOneUse())
6001     return SDValue();
6002
6003   SDValue Src = And0;
6004
6005   // Attempt to find a 'not' op.
6006   // TODO: Should we favor test+set even without the 'not' op?
6007   bool FoundNot = false;
6008   if (isBitwiseNot(Src)) {
6009     FoundNot = true;
6010     Src = Src.getOperand(0);
6011
6012     // Look though an optional truncation. The source operand may not be the
6013     // same type as the original 'and', but that is ok because we are masking
6014     // off everything but the low bit.
6015     if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6016       Src = Src.getOperand(0);
6017   }
6018
6019   // Match a shift-right by constant.
6020   if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6021     return SDValue();
6022
6023   // We might have looked through casts that make this transform invalid.
6024   // TODO: If the source type is wider than the result type, do the mask and
6025   //       compare in the source type.
6026   unsigned VTBitWidth = VT.getScalarSizeInBits();
6027   SDValue ShiftAmt = Src.getOperand(1);
6028   auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6029   if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth))
6030     return SDValue();
6031
6032   // Set source to shift source.
6033   Src = Src.getOperand(0);
6034
6035   // Try again to find a 'not' op.
6036   // TODO: Should we favor test+set even with two 'not' ops?
6037   if (!FoundNot) {
6038     if (!isBitwiseNot(Src))
6039       return SDValue();
6040     Src = Src.getOperand(0);
6041   }
6042
6043   if (!TLI.hasBitTest(Src, ShiftAmt))
6044     return SDValue();
6045
6046   // Turn this into a bit-test pattern using mask op + setcc:
6047   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6048   // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6049   SDLoc DL(And);
6050   SDValue X = DAG.getZExtOrTrunc(Src, DL, VT);
6051   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6052   SDValue Mask = DAG.getConstant(
6053       APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT);
6054   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
6055   SDValue Zero = DAG.getConstant(0, DL, VT);
6056   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6057   return DAG.getZExtOrTrunc(Setcc, DL, VT);
6058 }
6059
6060 /// For targets that support usubsat, match a bit-hack form of that operation
6061 /// that ends in 'and' and convert it.
6062 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
6063   SDValue N0 = N->getOperand(0);
6064   SDValue N1 = N->getOperand(1);
6065   EVT VT = N1.getValueType();
6066
6067   // Canonicalize SRA as operand 1.
6068   if (N0.getOpcode() == ISD::SRA)
6069     std::swap(N0, N1);
6070
6071   // xor/add with SMIN (signmask) are logically equivalent.
6072   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
6073     return SDValue();
6074
6075   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
6076       N0.getOperand(0) != N1.getOperand(0))
6077     return SDValue();
6078
6079   unsigned BitWidth = VT.getScalarSizeInBits();
6080   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6081   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6082   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6083       !SraC || SraC->getAPIntValue() != BitWidth - 1)
6084     return SDValue();
6085
6086   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6087   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6088   SDLoc DL(N);
6089   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6090   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6091 }
6092
6093 /// Given a bitwise logic operation N with a matching bitwise logic operand,
6094 /// fold a pattern where 2 of the source operands are identically shifted
6095 /// values. For example:
6096 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6097 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6098                                  SelectionDAG &DAG) {
6099   unsigned LogicOpcode = N->getOpcode();
6100   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
6101           LogicOpcode == ISD::XOR)
6102          && "Expected bitwise logic operation");
6103
6104   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6105     return SDValue();
6106
6107   // Match another bitwise logic op and a shift.
6108   unsigned ShiftOpcode = ShiftOp.getOpcode();
6109   if (LogicOp.getOpcode() != LogicOpcode ||
6110       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6111         ShiftOpcode == ISD::SRA))
6112     return SDValue();
6113
6114   // Match another shift op inside the first logic operand. Handle both commuted
6115   // possibilities.
6116   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6117   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6118   SDValue X1 = ShiftOp.getOperand(0);
6119   SDValue Y = ShiftOp.getOperand(1);
6120   SDValue X0, Z;
6121   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6122       LogicOp.getOperand(0).getOperand(1) == Y) {
6123     X0 = LogicOp.getOperand(0).getOperand(0);
6124     Z = LogicOp.getOperand(1);
6125   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6126              LogicOp.getOperand(1).getOperand(1) == Y) {
6127     X0 = LogicOp.getOperand(1).getOperand(0);
6128     Z = LogicOp.getOperand(0);
6129   } else {
6130     return SDValue();
6131   }
6132
6133   EVT VT = N->getValueType(0);
6134   SDLoc DL(N);
6135   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6136   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6137   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6138 }
6139
6140 SDValue DAGCombiner::visitAND(SDNode *N) {
6141   SDValue N0 = N->getOperand(0);
6142   SDValue N1 = N->getOperand(1);
6143   EVT VT = N1.getValueType();
6144
6145   // x & x --> x
6146   if (N0 == N1)
6147     return N0;
6148
6149   // fold (and c1, c2) -> c1&c2
6150   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6151     return C;
6152
6153   // canonicalize constant to RHS
6154   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6155       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6156     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6157
6158   // fold vector ops
6159   if (VT.isVector()) {
6160     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6161       return FoldedVOp;
6162
6163     // fold (and x, 0) -> 0, vector edition
6164     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6165       // do not return N1, because undef node may exist in N1
6166       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6167                              SDLoc(N), N1.getValueType());
6168
6169     // fold (and x, -1) -> x, vector edition
6170     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6171       return N0;
6172
6173     // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6174     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6175     ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6176     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
6177         Splat && N1.hasOneUse()) {
6178       EVT LoadVT = MLoad->getMemoryVT();
6179       EVT ExtVT = VT;
6180       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6181         // For this AND to be a zero extension of the masked load the elements
6182         // of the BuildVec must mask the bottom bits of the extended element
6183         // type
6184         uint64_t ElementSize =
6185             LoadVT.getVectorElementType().getScalarSizeInBits();
6186         if (Splat->getAPIntValue().isMask(ElementSize)) {
6187           return DAG.getMaskedLoad(
6188               ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6189               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6190               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6191               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6192         }
6193       }
6194     }
6195   }
6196
6197   // fold (and x, -1) -> x
6198   if (isAllOnesConstant(N1))
6199     return N0;
6200
6201   // if (and x, c) is known to be zero, return 0
6202   unsigned BitWidth = VT.getScalarSizeInBits();
6203   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6204   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
6205     return DAG.getConstant(0, SDLoc(N), VT);
6206
6207   if (SDValue NewSel = foldBinOpIntoSelect(N))
6208     return NewSel;
6209
6210   // reassociate and
6211   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
6212     return RAND;
6213
6214   // Try to convert a constant mask AND into a shuffle clear mask.
6215   if (VT.isVector())
6216     if (SDValue Shuffle = XformToShuffleWithZero(N))
6217       return Shuffle;
6218
6219   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6220     return Combined;
6221
6222   // fold (and (or x, C), D) -> D if (C & D) == D
6223   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6224     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6225   };
6226   if (N0.getOpcode() == ISD::OR &&
6227       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6228     return N1;
6229   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6230   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6231     SDValue N0Op0 = N0.getOperand(0);
6232     APInt Mask = ~N1C->getAPIntValue();
6233     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
6234     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
6235       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
6236                                  N0.getValueType(), N0Op0);
6237
6238       // Replace uses of the AND with uses of the Zero extend node.
6239       CombineTo(N, Zext);
6240
6241       // We actually want to replace all uses of the any_extend with the
6242       // zero_extend, to avoid duplicating things.  This will later cause this
6243       // AND to be folded.
6244       CombineTo(N0.getNode(), Zext);
6245       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6246     }
6247   }
6248
6249   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6250   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6251   // already be zero by virtue of the width of the base type of the load.
6252   //
6253   // the 'X' node here can either be nothing or an extract_vector_elt to catch
6254   // more cases.
6255   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6256        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6257        N0.getOperand(0).getOpcode() == ISD::LOAD &&
6258        N0.getOperand(0).getResNo() == 0) ||
6259       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6260     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6261                                          N0 : N0.getOperand(0) );
6262
6263     // Get the constant (if applicable) the zero'th operand is being ANDed with.
6264     // This can be a pure constant or a vector splat, in which case we treat the
6265     // vector as a scalar and use the splat value.
6266     APInt Constant = APInt::getZero(1);
6267     if (const ConstantSDNode *C = isConstOrConstSplat(
6268             N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6269       Constant = C->getAPIntValue();
6270     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6271       APInt SplatValue, SplatUndef;
6272       unsigned SplatBitSize;
6273       bool HasAnyUndefs;
6274       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6275                                              SplatBitSize, HasAnyUndefs);
6276       if (IsSplat) {
6277         // Undef bits can contribute to a possible optimisation if set, so
6278         // set them.
6279         SplatValue |= SplatUndef;
6280
6281         // The splat value may be something like "0x00FFFFFF", which means 0 for
6282         // the first vector value and FF for the rest, repeating. We need a mask
6283         // that will apply equally to all members of the vector, so AND all the
6284         // lanes of the constant together.
6285         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6286
6287         // If the splat value has been compressed to a bitlength lower
6288         // than the size of the vector lane, we need to re-expand it to
6289         // the lane size.
6290         if (EltBitWidth > SplatBitSize)
6291           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6292                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6293             SplatValue |= SplatValue.shl(SplatBitSize);
6294
6295         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6296         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6297         if ((SplatBitSize % EltBitWidth) == 0) {
6298           Constant = APInt::getAllOnes(EltBitWidth);
6299           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6300             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6301         }
6302       }
6303     }
6304
6305     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6306     // actually legal and isn't going to get expanded, else this is a false
6307     // optimisation.
6308     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6309                                                     Load->getValueType(0),
6310                                                     Load->getMemoryVT());
6311
6312     // Resize the constant to the same size as the original memory access before
6313     // extension. If it is still the AllOnesValue then this AND is completely
6314     // unneeded.
6315     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6316
6317     bool B;
6318     switch (Load->getExtensionType()) {
6319     default: B = false; break;
6320     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6321     case ISD::ZEXTLOAD:
6322     case ISD::NON_EXTLOAD: B = true; break;
6323     }
6324
6325     if (B && Constant.isAllOnes()) {
6326       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6327       // preserve semantics once we get rid of the AND.
6328       SDValue NewLoad(Load, 0);
6329
6330       // Fold the AND away. NewLoad may get replaced immediately.
6331       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6332
6333       if (Load->getExtensionType() == ISD::EXTLOAD) {
6334         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6335                               Load->getValueType(0), SDLoc(Load),
6336                               Load->getChain(), Load->getBasePtr(),
6337                               Load->getOffset(), Load->getMemoryVT(),
6338                               Load->getMemOperand());
6339         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6340         if (Load->getNumValues() == 3) {
6341           // PRE/POST_INC loads have 3 values.
6342           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6343                            NewLoad.getValue(2) };
6344           CombineTo(Load, To, 3, true);
6345         } else {
6346           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6347         }
6348       }
6349
6350       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6351     }
6352   }
6353
6354   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
6355       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
6356     SDValue Ext = N0.getOperand(0);
6357     EVT ExtVT = Ext->getValueType(0);
6358     SDValue Extendee = Ext->getOperand(0);
6359
6360     unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
6361     if (N1C->getAPIntValue().isMask(ScalarWidth)) {
6362       //    (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
6363       // => (extract_subvector (iN_zeroext v))
6364       SDValue ZeroExtExtendee =
6365           DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
6366
6367       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
6368                          N0.getOperand(1));
6369     }
6370   }
6371
6372   // fold (and (masked_gather x)) -> (zext_masked_gather x)
6373   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6374     EVT MemVT = GN0->getMemoryVT();
6375     EVT ScalarVT = MemVT.getScalarType();
6376
6377     if (SDValue(GN0, 0).hasOneUse() &&
6378         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6379         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6380       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
6381                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
6382
6383       SDValue ZExtLoad = DAG.getMaskedGather(
6384           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6385           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6386
6387       CombineTo(N, ZExtLoad);
6388       AddToWorklist(ZExtLoad.getNode());
6389       // Avoid recheck of N.
6390       return SDValue(N, 0);
6391     }
6392   }
6393
6394   // fold (and (load x), 255) -> (zextload x, i8)
6395   // fold (and (extload x, i16), 255) -> (zextload x, i8)
6396   if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
6397     if (SDValue Res = reduceLoadWidth(N))
6398       return Res;
6399
6400   if (LegalTypes) {
6401     // Attempt to propagate the AND back up to the leaves which, if they're
6402     // loads, can be combined to narrow loads and the AND node can be removed.
6403     // Perform after legalization so that extend nodes will already be
6404     // combined into the loads.
6405     if (BackwardsPropagateMask(N))
6406       return SDValue(N, 0);
6407   }
6408
6409   if (SDValue Combined = visitANDLike(N0, N1, N))
6410     return Combined;
6411
6412   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
6413   if (N0.getOpcode() == N1.getOpcode())
6414     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6415       return V;
6416
6417   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6418     return R;
6419   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
6420     return R;
6421
6422   // Masking the negated extension of a boolean is just the zero-extended
6423   // boolean:
6424   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6425   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6426   //
6427   // Note: the SimplifyDemandedBits fold below can make an information-losing
6428   // transform, and then we have no way to find this better fold.
6429   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6430     if (isNullOrNullSplat(N0.getOperand(0))) {
6431       SDValue SubRHS = N0.getOperand(1);
6432       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6433           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6434         return SubRHS;
6435       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6436           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6437         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6438     }
6439   }
6440
6441   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6442   // fold (and (sra)) -> (and (srl)) when possible.
6443   if (SimplifyDemandedBits(SDValue(N, 0)))
6444     return SDValue(N, 0);
6445
6446   // fold (zext_inreg (extload x)) -> (zextload x)
6447   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6448   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6449       (ISD::isEXTLoad(N0.getNode()) ||
6450        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6451     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6452     EVT MemVT = LN0->getMemoryVT();
6453     // If we zero all the possible extended bits, then we can turn this into
6454     // a zextload if we are running before legalize or the operation is legal.
6455     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6456     unsigned MemBitSize = MemVT.getScalarSizeInBits();
6457     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6458     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6459         ((!LegalOperations && LN0->isSimple()) ||
6460          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6461       SDValue ExtLoad =
6462           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6463                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6464       AddToWorklist(N);
6465       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6466       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6467     }
6468   }
6469
6470   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6471   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6472     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6473                                            N0.getOperand(1), false))
6474       return BSwap;
6475   }
6476
6477   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6478     return Shifts;
6479
6480   if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6481     return V;
6482
6483   // Recognize the following pattern:
6484   //
6485   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6486   //
6487   // where bitmask is a mask that clears the upper bits of AndVT. The
6488   // number of bits in bitmask must be a power of two.
6489   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6490     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6491       return false;
6492
6493     auto *C = dyn_cast<ConstantSDNode>(RHS);
6494     if (!C)
6495       return false;
6496
6497     if (!C->getAPIntValue().isMask(
6498             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6499       return false;
6500
6501     return true;
6502   };
6503
6504   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6505   if (IsAndZeroExtMask(N0, N1))
6506     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6507
6508   if (hasOperation(ISD::USUBSAT, VT))
6509     if (SDValue V = foldAndToUsubsat(N, DAG))
6510       return V;
6511
6512   return SDValue();
6513 }
6514
6515 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6516 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6517                                         bool DemandHighBits) {
6518   if (!LegalOperations)
6519     return SDValue();
6520
6521   EVT VT = N->getValueType(0);
6522   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6523     return SDValue();
6524   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6525     return SDValue();
6526
6527   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6528   bool LookPassAnd0 = false;
6529   bool LookPassAnd1 = false;
6530   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6531     std::swap(N0, N1);
6532   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6533     std::swap(N0, N1);
6534   if (N0.getOpcode() == ISD::AND) {
6535     if (!N0->hasOneUse())
6536       return SDValue();
6537     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6538     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6539     // This is needed for X86.
6540     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6541                   N01C->getZExtValue() != 0xFFFF))
6542       return SDValue();
6543     N0 = N0.getOperand(0);
6544     LookPassAnd0 = true;
6545   }
6546
6547   if (N1.getOpcode() == ISD::AND) {
6548     if (!N1->hasOneUse())
6549       return SDValue();
6550     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6551     if (!N11C || N11C->getZExtValue() != 0xFF)
6552       return SDValue();
6553     N1 = N1.getOperand(0);
6554     LookPassAnd1 = true;
6555   }
6556
6557   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6558     std::swap(N0, N1);
6559   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6560     return SDValue();
6561   if (!N0->hasOneUse() || !N1->hasOneUse())
6562     return SDValue();
6563
6564   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6565   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6566   if (!N01C || !N11C)
6567     return SDValue();
6568   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6569     return SDValue();
6570
6571   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6572   SDValue N00 = N0->getOperand(0);
6573   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6574     if (!N00->hasOneUse())
6575       return SDValue();
6576     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6577     if (!N001C || N001C->getZExtValue() != 0xFF)
6578       return SDValue();
6579     N00 = N00.getOperand(0);
6580     LookPassAnd0 = true;
6581   }
6582
6583   SDValue N10 = N1->getOperand(0);
6584   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6585     if (!N10->hasOneUse())
6586       return SDValue();
6587     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6588     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6589     // for X86.
6590     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6591                    N101C->getZExtValue() != 0xFFFF))
6592       return SDValue();
6593     N10 = N10.getOperand(0);
6594     LookPassAnd1 = true;
6595   }
6596
6597   if (N00 != N10)
6598     return SDValue();
6599
6600   // Make sure everything beyond the low halfword gets set to zero since the SRL
6601   // 16 will clear the top bits.
6602   unsigned OpSizeInBits = VT.getSizeInBits();
6603   if (OpSizeInBits > 16) {
6604     // If the left-shift isn't masked out then the only way this is a bswap is
6605     // if all bits beyond the low 8 are 0. In that case the entire pattern
6606     // reduces to a left shift anyway: leave it for other parts of the combiner.
6607     if (DemandHighBits && !LookPassAnd0)
6608       return SDValue();
6609
6610     // However, if the right shift isn't masked out then it might be because
6611     // it's not needed. See if we can spot that too. If the high bits aren't
6612     // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
6613     // upper bits to be zero.
6614     if (!LookPassAnd1) {
6615       unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
6616       if (!DAG.MaskedValueIsZero(N10,
6617                                  APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
6618         return SDValue();
6619     }
6620   }
6621
6622   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6623   if (OpSizeInBits > 16) {
6624     SDLoc DL(N);
6625     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6626                       DAG.getConstant(OpSizeInBits - 16, DL,
6627                                       getShiftAmountTy(VT)));
6628   }
6629   return Res;
6630 }
6631
6632 /// Return true if the specified node is an element that makes up a 32-bit
6633 /// packed halfword byteswap.
6634 /// ((x & 0x000000ff) << 8) |
6635 /// ((x & 0x0000ff00) >> 8) |
6636 /// ((x & 0x00ff0000) << 8) |
6637 /// ((x & 0xff000000) >> 8)
6638 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6639   if (!N->hasOneUse())
6640     return false;
6641
6642   unsigned Opc = N.getOpcode();
6643   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6644     return false;
6645
6646   SDValue N0 = N.getOperand(0);
6647   unsigned Opc0 = N0.getOpcode();
6648   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6649     return false;
6650
6651   ConstantSDNode *N1C = nullptr;
6652   // SHL or SRL: look upstream for AND mask operand
6653   if (Opc == ISD::AND)
6654     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6655   else if (Opc0 == ISD::AND)
6656     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6657   if (!N1C)
6658     return false;
6659
6660   unsigned MaskByteOffset;
6661   switch (N1C->getZExtValue()) {
6662   default:
6663     return false;
6664   case 0xFF:       MaskByteOffset = 0; break;
6665   case 0xFF00:     MaskByteOffset = 1; break;
6666   case 0xFFFF:
6667     // In case demanded bits didn't clear the bits that will be shifted out.
6668     // This is needed for X86.
6669     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6670       MaskByteOffset = 1;
6671       break;
6672     }
6673     return false;
6674   case 0xFF0000:   MaskByteOffset = 2; break;
6675   case 0xFF000000: MaskByteOffset = 3; break;
6676   }
6677
6678   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6679   if (Opc == ISD::AND) {
6680     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6681       // (x >> 8) & 0xff
6682       // (x >> 8) & 0xff0000
6683       if (Opc0 != ISD::SRL)
6684         return false;
6685       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6686       if (!C || C->getZExtValue() != 8)
6687         return false;
6688     } else {
6689       // (x << 8) & 0xff00
6690       // (x << 8) & 0xff000000
6691       if (Opc0 != ISD::SHL)
6692         return false;
6693       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6694       if (!C || C->getZExtValue() != 8)
6695         return false;
6696     }
6697   } else if (Opc == ISD::SHL) {
6698     // (x & 0xff) << 8
6699     // (x & 0xff0000) << 8
6700     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6701       return false;
6702     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6703     if (!C || C->getZExtValue() != 8)
6704       return false;
6705   } else { // Opc == ISD::SRL
6706     // (x & 0xff00) >> 8
6707     // (x & 0xff000000) >> 8
6708     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6709       return false;
6710     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6711     if (!C || C->getZExtValue() != 8)
6712       return false;
6713   }
6714
6715   if (Parts[MaskByteOffset])
6716     return false;
6717
6718   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6719   return true;
6720 }
6721
6722 // Match 2 elements of a packed halfword bswap.
6723 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6724   if (N.getOpcode() == ISD::OR)
6725     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6726            isBSwapHWordElement(N.getOperand(1), Parts);
6727
6728   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6729     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6730     if (!C || C->getAPIntValue() != 16)
6731       return false;
6732     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6733     return true;
6734   }
6735
6736   return false;
6737 }
6738
6739 // Match this pattern:
6740 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6741 // And rewrite this to:
6742 //   (rotr (bswap A), 16)
6743 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6744                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6745                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6746   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6747          "MatchBSwapHWordOrAndAnd: expecting i32");
6748   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6749     return SDValue();
6750   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6751     return SDValue();
6752   // TODO: this is too restrictive; lifting this restriction requires more tests
6753   if (!N0->hasOneUse() || !N1->hasOneUse())
6754     return SDValue();
6755   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6756   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6757   if (!Mask0 || !Mask1)
6758     return SDValue();
6759   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6760       Mask1->getAPIntValue() != 0x00ff00ff)
6761     return SDValue();
6762   SDValue Shift0 = N0.getOperand(0);
6763   SDValue Shift1 = N1.getOperand(0);
6764   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6765     return SDValue();
6766   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6767   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6768   if (!ShiftAmt0 || !ShiftAmt1)
6769     return SDValue();
6770   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6771     return SDValue();
6772   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6773     return SDValue();
6774
6775   SDLoc DL(N);
6776   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6777   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6778   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6779 }
6780
6781 /// Match a 32-bit packed halfword bswap. That is
6782 /// ((x & 0x000000ff) << 8) |
6783 /// ((x & 0x0000ff00) >> 8) |
6784 /// ((x & 0x00ff0000) << 8) |
6785 /// ((x & 0xff000000) >> 8)
6786 /// => (rotl (bswap x), 16)
6787 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6788   if (!LegalOperations)
6789     return SDValue();
6790
6791   EVT VT = N->getValueType(0);
6792   if (VT != MVT::i32)
6793     return SDValue();
6794   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6795     return SDValue();
6796
6797   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6798                                               getShiftAmountTy(VT)))
6799   return BSwap;
6800
6801   // Try again with commuted operands.
6802   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6803                                               getShiftAmountTy(VT)))
6804   return BSwap;
6805
6806
6807   // Look for either
6808   // (or (bswaphpair), (bswaphpair))
6809   // (or (or (bswaphpair), (and)), (and))
6810   // (or (or (and), (bswaphpair)), (and))
6811   SDNode *Parts[4] = {};
6812
6813   if (isBSwapHWordPair(N0, Parts)) {
6814     // (or (or (and), (and)), (or (and), (and)))
6815     if (!isBSwapHWordPair(N1, Parts))
6816       return SDValue();
6817   } else if (N0.getOpcode() == ISD::OR) {
6818     // (or (or (or (and), (and)), (and)), (and))
6819     if (!isBSwapHWordElement(N1, Parts))
6820       return SDValue();
6821     SDValue N00 = N0.getOperand(0);
6822     SDValue N01 = N0.getOperand(1);
6823     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6824         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6825       return SDValue();
6826   } else {
6827     return SDValue();
6828   }
6829
6830   // Make sure the parts are all coming from the same node.
6831   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6832     return SDValue();
6833
6834   SDLoc DL(N);
6835   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6836                               SDValue(Parts[0], 0));
6837
6838   // Result of the bswap should be rotated by 16. If it's not legal, then
6839   // do  (x << 16) | (x >> 16).
6840   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6841   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6842     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6843   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6844     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6845   return DAG.getNode(ISD::OR, DL, VT,
6846                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6847                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6848 }
6849
6850 /// This contains all DAGCombine rules which reduce two values combined by
6851 /// an Or operation to a single value \see visitANDLike().
6852 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6853   EVT VT = N1.getValueType();
6854   SDLoc DL(N);
6855
6856   // fold (or x, undef) -> -1
6857   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6858     return DAG.getAllOnesConstant(DL, VT);
6859
6860   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6861     return V;
6862
6863   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6864   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6865       // Don't increase # computations.
6866       (N0->hasOneUse() || N1->hasOneUse())) {
6867     // We can only do this xform if we know that bits from X that are set in C2
6868     // but not in C1 are already zero.  Likewise for Y.
6869     if (const ConstantSDNode *N0O1C =
6870         getAsNonOpaqueConstant(N0.getOperand(1))) {
6871       if (const ConstantSDNode *N1O1C =
6872           getAsNonOpaqueConstant(N1.getOperand(1))) {
6873         // We can only do this xform if we know that bits from X that are set in
6874         // C2 but not in C1 are already zero.  Likewise for Y.
6875         const APInt &LHSMask = N0O1C->getAPIntValue();
6876         const APInt &RHSMask = N1O1C->getAPIntValue();
6877
6878         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6879             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6880           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6881                                   N0.getOperand(0), N1.getOperand(0));
6882           return DAG.getNode(ISD::AND, DL, VT, X,
6883                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6884         }
6885       }
6886     }
6887   }
6888
6889   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6890   if (N0.getOpcode() == ISD::AND &&
6891       N1.getOpcode() == ISD::AND &&
6892       N0.getOperand(0) == N1.getOperand(0) &&
6893       // Don't increase # computations.
6894       (N0->hasOneUse() || N1->hasOneUse())) {
6895     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6896                             N0.getOperand(1), N1.getOperand(1));
6897     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6898   }
6899
6900   return SDValue();
6901 }
6902
6903 /// OR combines for which the commuted variant will be tried as well.
6904 static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
6905                                   SDNode *N) {
6906   EVT VT = N0.getValueType();
6907   if (N0.getOpcode() == ISD::AND) {
6908     SDValue N00 = N0.getOperand(0);
6909     SDValue N01 = N0.getOperand(1);
6910
6911     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6912     // TODO: Set AllowUndefs = true.
6913     if (getBitwiseNotOperand(N01, N00,
6914                              /* AllowUndefs */ false) == N1)
6915       return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
6916
6917     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6918     if (getBitwiseNotOperand(N00, N01,
6919                              /* AllowUndefs */ false) == N1)
6920       return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
6921   }
6922
6923   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6924     return R;
6925
6926   auto peekThroughZext = [](SDValue V) {
6927     if (V->getOpcode() == ISD::ZERO_EXTEND)
6928       return V->getOperand(0);
6929     return V;
6930   };
6931
6932   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
6933   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
6934       N0.getOperand(0) == N1.getOperand(0) &&
6935       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6936     return N0;
6937
6938   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
6939   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
6940       N0.getOperand(1) == N1.getOperand(0) &&
6941       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6942     return N0;
6943
6944   return SDValue();
6945 }
6946
6947 SDValue DAGCombiner::visitOR(SDNode *N) {
6948   SDValue N0 = N->getOperand(0);
6949   SDValue N1 = N->getOperand(1);
6950   EVT VT = N1.getValueType();
6951
6952   // x | x --> x
6953   if (N0 == N1)
6954     return N0;
6955
6956   // fold (or c1, c2) -> c1|c2
6957   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6958     return C;
6959
6960   // canonicalize constant to RHS
6961   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6962       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6963     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6964
6965   // fold vector ops
6966   if (VT.isVector()) {
6967     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6968       return FoldedVOp;
6969
6970     // fold (or x, 0) -> x, vector edition
6971     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6972       return N0;
6973
6974     // fold (or x, -1) -> -1, vector edition
6975     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6976       // do not return N1, because undef node may exist in N1
6977       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6978
6979     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6980     // Do this only if the resulting type / shuffle is legal.
6981     auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
6982     auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
6983     if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
6984       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6985       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6986       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6987       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6988       // Ensure both shuffles have a zero input.
6989       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6990         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6991         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6992         bool CanFold = true;
6993         int NumElts = VT.getVectorNumElements();
6994         SmallVector<int, 4> Mask(NumElts, -1);
6995
6996         for (int i = 0; i != NumElts; ++i) {
6997           int M0 = SV0->getMaskElt(i);
6998           int M1 = SV1->getMaskElt(i);
6999
7000           // Determine if either index is pointing to a zero vector.
7001           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7002           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7003
7004           // If one element is zero and the otherside is undef, keep undef.
7005           // This also handles the case that both are undef.
7006           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7007             continue;
7008
7009           // Make sure only one of the elements is zero.
7010           if (M0Zero == M1Zero) {
7011             CanFold = false;
7012             break;
7013           }
7014
7015           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7016
7017           // We have a zero and non-zero element. If the non-zero came from
7018           // SV0 make the index a LHS index. If it came from SV1, make it
7019           // a RHS index. We need to mod by NumElts because we don't care
7020           // which operand it came from in the original shuffles.
7021           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7022         }
7023
7024         if (CanFold) {
7025           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7026           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7027
7028           SDValue LegalShuffle =
7029               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7030                                           Mask, DAG);
7031           if (LegalShuffle)
7032             return LegalShuffle;
7033         }
7034       }
7035     }
7036   }
7037
7038   // fold (or x, 0) -> x
7039   if (isNullConstant(N1))
7040     return N0;
7041
7042   // fold (or x, -1) -> -1
7043   if (isAllOnesConstant(N1))
7044     return N1;
7045
7046   if (SDValue NewSel = foldBinOpIntoSelect(N))
7047     return NewSel;
7048
7049   // fold (or x, c) -> c iff (x & ~c) == 0
7050   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7051   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7052     return N1;
7053
7054   if (SDValue Combined = visitORLike(N0, N1, N))
7055     return Combined;
7056
7057   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7058     return Combined;
7059
7060   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7061   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7062     return BSwap;
7063   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7064     return BSwap;
7065
7066   // reassociate or
7067   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7068     return ROR;
7069
7070   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7071   // iff (c1 & c2) != 0 or c1/c2 are undef.
7072   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7073     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7074   };
7075   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7076       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7077     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7078                                                  {N1, N0.getOperand(1)})) {
7079       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7080       AddToWorklist(IOR.getNode());
7081       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7082     }
7083   }
7084
7085   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7086     return Combined;
7087   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7088     return Combined;
7089
7090   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
7091   if (N0.getOpcode() == N1.getOpcode())
7092     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7093       return V;
7094
7095   // See if this is some rotate idiom.
7096   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7097     return Rot;
7098
7099   if (SDValue Load = MatchLoadCombine(N))
7100     return Load;
7101
7102   // Simplify the operands using demanded-bits information.
7103   if (SimplifyDemandedBits(SDValue(N, 0)))
7104     return SDValue(N, 0);
7105
7106   // If OR can be rewritten into ADD, try combines based on ADD.
7107   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7108       DAG.haveNoCommonBitsSet(N0, N1))
7109     if (SDValue Combined = visitADDLike(N))
7110       return Combined;
7111
7112   return SDValue();
7113 }
7114
7115 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
7116   if (Op.getOpcode() == ISD::AND &&
7117       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7118     Mask = Op.getOperand(1);
7119     return Op.getOperand(0);
7120   }
7121   return Op;
7122 }
7123
7124 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
7125 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7126                             SDValue &Mask) {
7127   Op = stripConstantMask(DAG, Op, Mask);
7128   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7129     Shift = Op;
7130     return true;
7131   }
7132   return false;
7133 }
7134
7135 /// Helper function for visitOR to extract the needed side of a rotate idiom
7136 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
7137 /// InstCombine merged some outside op with one of the shifts from
7138 /// the rotate pattern.
7139 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7140 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
7141 /// patterns:
7142 ///
7143 ///   (or (add v v) (shrl v bitwidth-1)):
7144 ///     expands (add v v) -> (shl v 1)
7145 ///
7146 ///   (or (mul v c0) (shrl (mul v c1) c2)):
7147 ///     expands (mul v c0) -> (shl (mul v c1) c3)
7148 ///
7149 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
7150 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
7151 ///
7152 ///   (or (shl v c0) (shrl (shl v c1) c2)):
7153 ///     expands (shl v c0) -> (shl (shl v c1) c3)
7154 ///
7155 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
7156 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
7157 ///
7158 /// Such that in all cases, c3+c2==bitwidth(op v c1).
7159 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
7160                                      SDValue ExtractFrom, SDValue &Mask,
7161                                      const SDLoc &DL) {
7162   assert(OppShift && ExtractFrom && "Empty SDValue");
7163   assert(
7164       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
7165       "Existing shift must be valid as a rotate half");
7166
7167   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7168
7169   // Value and Type of the shift.
7170   SDValue OppShiftLHS = OppShift.getOperand(0);
7171   EVT ShiftedVT = OppShiftLHS.getValueType();
7172
7173   // Amount of the existing shift.
7174   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7175
7176   // (add v v) -> (shl v 1)
7177   // TODO: Should this be a general DAG canonicalization?
7178   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7179       ExtractFrom.getOpcode() == ISD::ADD &&
7180       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7181       ExtractFrom.getOperand(0) == OppShiftLHS &&
7182       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7183     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7184                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7185
7186   // Preconditions:
7187   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7188   //
7189   // Find opcode of the needed shift to be extracted from (op0 v c0).
7190   unsigned Opcode = ISD::DELETED_NODE;
7191   bool IsMulOrDiv = false;
7192   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7193   // opcode or its arithmetic (mul or udiv) variant.
7194   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7195     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7196     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7197       return false;
7198     Opcode = NeededShift;
7199     return true;
7200   };
7201   // op0 must be either the needed shift opcode or the mul/udiv equivalent
7202   // that the needed shift can be extracted from.
7203   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7204       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7205     return SDValue();
7206
7207   // op0 must be the same opcode on both sides, have the same LHS argument,
7208   // and produce the same value type.
7209   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7210       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7211       ShiftedVT != ExtractFrom.getValueType())
7212     return SDValue();
7213
7214   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7215   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7216   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7217   ConstantSDNode *ExtractFromCst =
7218       isConstOrConstSplat(ExtractFrom.getOperand(1));
7219   // TODO: We should be able to handle non-uniform constant vectors for these values
7220   // Check that we have constant values.
7221   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7222       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7223       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7224     return SDValue();
7225
7226   // Compute the shift amount we need to extract to complete the rotate.
7227   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7228   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7229     return SDValue();
7230   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7231   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7232   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7233   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7234   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7235
7236   // Now try extract the needed shift from the ExtractFrom op and see if the
7237   // result matches up with the existing shift's LHS op.
7238   if (IsMulOrDiv) {
7239     // Op to extract from is a mul or udiv by a constant.
7240     // Check:
7241     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
7242     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
7243     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
7244                                                  NeededShiftAmt.getZExtValue());
7245     APInt ResultAmt;
7246     APInt Rem;
7247     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
7248     if (Rem != 0 || ResultAmt != OppLHSAmt)
7249       return SDValue();
7250   } else {
7251     // Op to extract from is a shift by a constant.
7252     // Check:
7253     //      c2 - (bitwidth(op0 v c0) - c1) == c0
7254     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
7255                                           ExtractFromAmt.getBitWidth()))
7256       return SDValue();
7257   }
7258
7259   // Return the expanded shift op that should allow a rotate to be formed.
7260   EVT ShiftVT = OppShift.getOperand(1).getValueType();
7261   EVT ResVT = ExtractFrom.getValueType();
7262   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
7263   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
7264 }
7265
7266 // Return true if we can prove that, whenever Neg and Pos are both in the
7267 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
7268 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
7269 //
7270 //     (or (shift1 X, Neg), (shift2 X, Pos))
7271 //
7272 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
7273 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
7274 // to consider shift amounts with defined behavior.
7275 //
7276 // The IsRotate flag should be set when the LHS of both shifts is the same.
7277 // Otherwise if matching a general funnel shift, it should be clear.
7278 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
7279                            SelectionDAG &DAG, bool IsRotate) {
7280   const auto &TLI = DAG.getTargetLoweringInfo();
7281   // If EltSize is a power of 2 then:
7282   //
7283   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
7284   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
7285   //
7286   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
7287   // for the stronger condition:
7288   //
7289   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
7290   //
7291   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
7292   // we can just replace Neg with Neg' for the rest of the function.
7293   //
7294   // In other cases we check for the even stronger condition:
7295   //
7296   //     Neg == EltSize - Pos                                    [B]
7297   //
7298   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
7299   // behavior if Pos == 0 (and consequently Neg == EltSize).
7300   //
7301   // We could actually use [A] whenever EltSize is a power of 2, but the
7302   // only extra cases that it would match are those uninteresting ones
7303   // where Neg and Pos are never in range at the same time.  E.g. for
7304   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7305   // as well as (sub 32, Pos), but:
7306   //
7307   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7308   //
7309   // always invokes undefined behavior for 32-bit X.
7310   //
7311   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7312   // This allows us to peek through any operations that only affect Mask's
7313   // un-demanded bits.
7314   //
7315   // NOTE: We can only do this when matching operations which won't modify the
7316   // least Log2(EltSize) significant bits and not a general funnel shift.
7317   unsigned MaskLoBits = 0;
7318   if (IsRotate && isPowerOf2_64(EltSize)) {
7319     unsigned Bits = Log2_64(EltSize);
7320     unsigned NegBits = Neg.getScalarValueSizeInBits();
7321     if (NegBits >= Bits) {
7322       APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
7323       if (SDValue Inner =
7324               TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
7325         Neg = Inner;
7326         MaskLoBits = Bits;
7327       }
7328     }
7329   }
7330
7331   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7332   if (Neg.getOpcode() != ISD::SUB)
7333     return false;
7334   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7335   if (!NegC)
7336     return false;
7337   SDValue NegOp1 = Neg.getOperand(1);
7338
7339   // On the RHS of [A], if Pos is the result of operation on Pos' that won't
7340   // affect Mask's demanded bits, just replace Pos with Pos'. These operations
7341   // are redundant for the purpose of the equality.
7342   if (MaskLoBits) {
7343     unsigned PosBits = Pos.getScalarValueSizeInBits();
7344     if (PosBits >= MaskLoBits) {
7345       APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
7346       if (SDValue Inner =
7347               TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
7348         Pos = Inner;
7349       }
7350     }
7351   }
7352
7353   // The condition we need is now:
7354   //
7355   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7356   //
7357   // If NegOp1 == Pos then we need:
7358   //
7359   //              EltSize & Mask == NegC & Mask
7360   //
7361   // (because "x & Mask" is a truncation and distributes through subtraction).
7362   //
7363   // We also need to account for a potential truncation of NegOp1 if the amount
7364   // has already been legalized to a shift amount type.
7365   APInt Width;
7366   if ((Pos == NegOp1) ||
7367       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7368     Width = NegC->getAPIntValue();
7369
7370   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7371   // Then the condition we want to prove becomes:
7372   //
7373   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7374   //
7375   // which, again because "x & Mask" is a truncation, becomes:
7376   //
7377   //                NegC & Mask == (EltSize - PosC) & Mask
7378   //             EltSize & Mask == (NegC + PosC) & Mask
7379   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7380     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7381       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7382     else
7383       return false;
7384   } else
7385     return false;
7386
7387   // Now we just need to check that EltSize & Mask == Width & Mask.
7388   if (MaskLoBits)
7389     // EltSize & Mask is 0 since Mask is EltSize - 1.
7390     return Width.getLoBits(MaskLoBits) == 0;
7391   return Width == EltSize;
7392 }
7393
7394 // A subroutine of MatchRotate used once we have found an OR of two opposite
7395 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
7396 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7397 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7398 // Neg with outer conversions stripped away.
7399 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7400                                        SDValue Neg, SDValue InnerPos,
7401                                        SDValue InnerNeg, bool HasPos,
7402                                        unsigned PosOpcode, unsigned NegOpcode,
7403                                        const SDLoc &DL) {
7404   // fold (or (shl x, (*ext y)),
7405   //          (srl x, (*ext (sub 32, y)))) ->
7406   //   (rotl x, y) or (rotr x, (sub 32, y))
7407   //
7408   // fold (or (shl x, (*ext (sub 32, y))),
7409   //          (srl x, (*ext y))) ->
7410   //   (rotr x, y) or (rotl x, (sub 32, y))
7411   EVT VT = Shifted.getValueType();
7412   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7413                      /*IsRotate*/ true)) {
7414     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7415                        HasPos ? Pos : Neg);
7416   }
7417
7418   return SDValue();
7419 }
7420
7421 // A subroutine of MatchRotate used once we have found an OR of two opposite
7422 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
7423 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7424 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7425 // Neg with outer conversions stripped away.
7426 // TODO: Merge with MatchRotatePosNeg.
7427 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7428                                        SDValue Neg, SDValue InnerPos,
7429                                        SDValue InnerNeg, bool HasPos,
7430                                        unsigned PosOpcode, unsigned NegOpcode,
7431                                        const SDLoc &DL) {
7432   EVT VT = N0.getValueType();
7433   unsigned EltBits = VT.getScalarSizeInBits();
7434
7435   // fold (or (shl x0, (*ext y)),
7436   //          (srl x1, (*ext (sub 32, y)))) ->
7437   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7438   //
7439   // fold (or (shl x0, (*ext (sub 32, y))),
7440   //          (srl x1, (*ext y))) ->
7441   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7442   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7443     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7444                        HasPos ? Pos : Neg);
7445   }
7446
7447   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7448   // so for now just use the PosOpcode case if its legal.
7449   // TODO: When can we use the NegOpcode case?
7450   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7451     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7452       if (Op.getOpcode() != BinOpc)
7453         return false;
7454       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7455       return Cst && (Cst->getAPIntValue() == Imm);
7456     };
7457
7458     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7459     //   -> (fshl x0, x1, y)
7460     if (IsBinOpImm(N1, ISD::SRL, 1) &&
7461         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7462         InnerPos == InnerNeg.getOperand(0) &&
7463         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7464       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7465     }
7466
7467     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7468     //   -> (fshr x0, x1, y)
7469     if (IsBinOpImm(N0, ISD::SHL, 1) &&
7470         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7471         InnerNeg == InnerPos.getOperand(0) &&
7472         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7473       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7474     }
7475
7476     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7477     //   -> (fshr x0, x1, y)
7478     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7479     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7480         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7481         InnerNeg == InnerPos.getOperand(0) &&
7482         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7483       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7484     }
7485   }
7486
7487   return SDValue();
7488 }
7489
7490 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
7491 // idioms for rotate, and if the target supports rotation instructions, generate
7492 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7493 // with different shifted sources.
7494 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7495   EVT VT = LHS.getValueType();
7496
7497   // The target must have at least one rotate/funnel flavor.
7498   // We still try to match rotate by constant pre-legalization.
7499   // TODO: Support pre-legalization funnel-shift by constant.
7500   bool HasROTL = hasOperation(ISD::ROTL, VT);
7501   bool HasROTR = hasOperation(ISD::ROTR, VT);
7502   bool HasFSHL = hasOperation(ISD::FSHL, VT);
7503   bool HasFSHR = hasOperation(ISD::FSHR, VT);
7504
7505   // If the type is going to be promoted and the target has enabled custom
7506   // lowering for rotate, allow matching rotate by non-constants. Only allow
7507   // this for scalar types.
7508   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
7509                                   TargetLowering::TypePromoteInteger) {
7510     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
7511     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
7512   }
7513
7514   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7515     return SDValue();
7516
7517   // Check for truncated rotate.
7518   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7519       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7520     assert(LHS.getValueType() == RHS.getValueType());
7521     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7522       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7523     }
7524   }
7525
7526   // Match "(X shl/srl V1) & V2" where V2 may not be present.
7527   SDValue LHSShift;   // The shift.
7528   SDValue LHSMask;    // AND value if any.
7529   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7530
7531   SDValue RHSShift;   // The shift.
7532   SDValue RHSMask;    // AND value if any.
7533   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7534
7535   // If neither side matched a rotate half, bail
7536   if (!LHSShift && !RHSShift)
7537     return SDValue();
7538
7539   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7540   // side of the rotate, so try to handle that here. In all cases we need to
7541   // pass the matched shift from the opposite side to compute the opcode and
7542   // needed shift amount to extract.  We still want to do this if both sides
7543   // matched a rotate half because one half may be a potential overshift that
7544   // can be broken down (ie if InstCombine merged two shl or srl ops into a
7545   // single one).
7546
7547   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7548   if (LHSShift)
7549     if (SDValue NewRHSShift =
7550             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7551       RHSShift = NewRHSShift;
7552   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7553   if (RHSShift)
7554     if (SDValue NewLHSShift =
7555             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7556       LHSShift = NewLHSShift;
7557
7558   // If a side is still missing, nothing else we can do.
7559   if (!RHSShift || !LHSShift)
7560     return SDValue();
7561
7562   // At this point we've matched or extracted a shift op on each side.
7563
7564   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7565     return SDValue(); // Shifts must disagree.
7566
7567   // Canonicalize shl to left side in a shl/srl pair.
7568   if (RHSShift.getOpcode() == ISD::SHL) {
7569     std::swap(LHS, RHS);
7570     std::swap(LHSShift, RHSShift);
7571     std::swap(LHSMask, RHSMask);
7572   }
7573
7574   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7575   SDValue LHSShiftArg = LHSShift.getOperand(0);
7576   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7577   SDValue RHSShiftArg = RHSShift.getOperand(0);
7578   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7579
7580   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7581                                         ConstantSDNode *RHS) {
7582     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7583   };
7584
7585   auto ApplyMasks = [&](SDValue Res) {
7586     // If there is an AND of either shifted operand, apply it to the result.
7587     if (LHSMask.getNode() || RHSMask.getNode()) {
7588       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7589       SDValue Mask = AllOnes;
7590
7591       if (LHSMask.getNode()) {
7592         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7593         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7594                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7595       }
7596       if (RHSMask.getNode()) {
7597         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7598         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7599                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7600       }
7601
7602       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7603     }
7604
7605     return Res;
7606   };
7607
7608   // TODO: Support pre-legalization funnel-shift by constant.
7609   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7610   if (!IsRotate && !(HasFSHL || HasFSHR)) {
7611     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
7612         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7613       // Look for a disguised rotate by constant.
7614       // The common shifted operand X may be hidden inside another 'or'.
7615       SDValue X, Y;
7616       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
7617         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
7618           return false;
7619         if (CommonOp == Or.getOperand(0)) {
7620           X = CommonOp;
7621           Y = Or.getOperand(1);
7622           return true;
7623         }
7624         if (CommonOp == Or.getOperand(1)) {
7625           X = CommonOp;
7626           Y = Or.getOperand(0);
7627           return true;
7628         }
7629         return false;
7630       };
7631
7632       SDValue Res;
7633       if (matchOr(LHSShiftArg, RHSShiftArg)) {
7634         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
7635         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7636         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
7637         Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
7638       } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
7639         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
7640         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7641         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
7642         Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
7643       } else {
7644         return SDValue();
7645       }
7646
7647       return ApplyMasks(Res);
7648     }
7649
7650     return SDValue(); // Requires funnel shift support.
7651   }
7652
7653   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7654   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7655   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7656   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7657   // iff C1+C2 == EltSizeInBits
7658   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7659     SDValue Res;
7660     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7661       bool UseROTL = !LegalOperations || HasROTL;
7662       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7663                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
7664     } else {
7665       bool UseFSHL = !LegalOperations || HasFSHL;
7666       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7667                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7668     }
7669
7670     return ApplyMasks(Res);
7671   }
7672
7673   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7674   // shift.
7675   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7676     return SDValue();
7677
7678   // If there is a mask here, and we have a variable shift, we can't be sure
7679   // that we're masking out the right stuff.
7680   if (LHSMask.getNode() || RHSMask.getNode())
7681     return SDValue();
7682
7683   // If the shift amount is sign/zext/any-extended just peel it off.
7684   SDValue LExtOp0 = LHSShiftAmt;
7685   SDValue RExtOp0 = RHSShiftAmt;
7686   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7687        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7688        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7689        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7690       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7691        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7692        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7693        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7694     LExtOp0 = LHSShiftAmt.getOperand(0);
7695     RExtOp0 = RHSShiftAmt.getOperand(0);
7696   }
7697
7698   if (IsRotate && (HasROTL || HasROTR)) {
7699     SDValue TryL =
7700         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7701                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
7702     if (TryL)
7703       return TryL;
7704
7705     SDValue TryR =
7706         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7707                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
7708     if (TryR)
7709       return TryR;
7710   }
7711
7712   SDValue TryL =
7713       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7714                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
7715   if (TryL)
7716     return TryL;
7717
7718   SDValue TryR =
7719       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7720                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
7721   if (TryR)
7722     return TryR;
7723
7724   return SDValue();
7725 }
7726
7727 namespace {
7728
7729 /// Represents known origin of an individual byte in load combine pattern. The
7730 /// value of the byte is either constant zero or comes from memory.
7731 struct ByteProvider {
7732   // For constant zero providers Load is set to nullptr. For memory providers
7733   // Load represents the node which loads the byte from memory.
7734   // ByteOffset is the offset of the byte in the value produced by the load.
7735   LoadSDNode *Load = nullptr;
7736   unsigned ByteOffset = 0;
7737
7738   ByteProvider() = default;
7739
7740   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7741     return ByteProvider(Load, ByteOffset);
7742   }
7743
7744   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7745
7746   bool isConstantZero() const { return !Load; }
7747   bool isMemory() const { return Load; }
7748
7749   bool operator==(const ByteProvider &Other) const {
7750     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7751   }
7752
7753 private:
7754   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7755       : Load(Load), ByteOffset(ByteOffset) {}
7756 };
7757
7758 } // end anonymous namespace
7759
7760 /// Recursively traverses the expression calculating the origin of the requested
7761 /// byte of the given value. Returns None if the provider can't be calculated.
7762 ///
7763 /// For all the values except the root of the expression verifies that the value
7764 /// has exactly one use and if it's not true return None. This way if the origin
7765 /// of the byte is returned it's guaranteed that the values which contribute to
7766 /// the byte are not used outside of this expression.
7767 ///
7768 /// Because the parts of the expression are not allowed to have more than one
7769 /// use this function iterates over trees, not DAGs. So it never visits the same
7770 /// node more than once.
7771 static const Optional<ByteProvider>
7772 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7773                       bool Root = false) {
7774   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7775   if (Depth == 10)
7776     return None;
7777
7778   if (!Root && !Op.hasOneUse())
7779     return None;
7780
7781   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7782   unsigned BitWidth = Op.getValueSizeInBits();
7783   if (BitWidth % 8 != 0)
7784     return None;
7785   unsigned ByteWidth = BitWidth / 8;
7786   assert(Index < ByteWidth && "invalid index requested");
7787   (void) ByteWidth;
7788
7789   switch (Op.getOpcode()) {
7790   case ISD::OR: {
7791     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7792     if (!LHS)
7793       return None;
7794     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7795     if (!RHS)
7796       return None;
7797
7798     if (LHS->isConstantZero())
7799       return RHS;
7800     if (RHS->isConstantZero())
7801       return LHS;
7802     return None;
7803   }
7804   case ISD::SHL: {
7805     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7806     if (!ShiftOp)
7807       return None;
7808
7809     uint64_t BitShift = ShiftOp->getZExtValue();
7810     if (BitShift % 8 != 0)
7811       return None;
7812     uint64_t ByteShift = BitShift / 8;
7813
7814     return Index < ByteShift
7815                ? ByteProvider::getConstantZero()
7816                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7817                                        Depth + 1);
7818   }
7819   case ISD::ANY_EXTEND:
7820   case ISD::SIGN_EXTEND:
7821   case ISD::ZERO_EXTEND: {
7822     SDValue NarrowOp = Op->getOperand(0);
7823     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7824     if (NarrowBitWidth % 8 != 0)
7825       return None;
7826     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7827
7828     if (Index >= NarrowByteWidth)
7829       return Op.getOpcode() == ISD::ZERO_EXTEND
7830                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7831                  : None;
7832     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7833   }
7834   case ISD::BSWAP:
7835     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7836                                  Depth + 1);
7837   case ISD::LOAD: {
7838     auto L = cast<LoadSDNode>(Op.getNode());
7839     if (!L->isSimple() || L->isIndexed())
7840       return None;
7841
7842     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7843     if (NarrowBitWidth % 8 != 0)
7844       return None;
7845     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7846
7847     if (Index >= NarrowByteWidth)
7848       return L->getExtensionType() == ISD::ZEXTLOAD
7849                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7850                  : None;
7851     return ByteProvider::getMemory(L, Index);
7852   }
7853   }
7854
7855   return None;
7856 }
7857
7858 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7859   return i;
7860 }
7861
7862 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7863   return BW - i - 1;
7864 }
7865
7866 // Check if the bytes offsets we are looking at match with either big or
7867 // little endian value loaded. Return true for big endian, false for little
7868 // endian, and None if match failed.
7869 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7870                                   int64_t FirstOffset) {
7871   // The endian can be decided only when it is 2 bytes at least.
7872   unsigned Width = ByteOffsets.size();
7873   if (Width < 2)
7874     return None;
7875
7876   bool BigEndian = true, LittleEndian = true;
7877   for (unsigned i = 0; i < Width; i++) {
7878     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7879     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7880     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7881     if (!BigEndian && !LittleEndian)
7882       return None;
7883   }
7884
7885   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7886                                         "little endian");
7887   return BigEndian;
7888 }
7889
7890 static SDValue stripTruncAndExt(SDValue Value) {
7891   switch (Value.getOpcode()) {
7892   case ISD::TRUNCATE:
7893   case ISD::ZERO_EXTEND:
7894   case ISD::SIGN_EXTEND:
7895   case ISD::ANY_EXTEND:
7896     return stripTruncAndExt(Value.getOperand(0));
7897   }
7898   return Value;
7899 }
7900
7901 /// Match a pattern where a wide type scalar value is stored by several narrow
7902 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7903 /// supports it.
7904 ///
7905 /// Assuming little endian target:
7906 ///  i8 *p = ...
7907 ///  i32 val = ...
7908 ///  p[0] = (val >> 0) & 0xFF;
7909 ///  p[1] = (val >> 8) & 0xFF;
7910 ///  p[2] = (val >> 16) & 0xFF;
7911 ///  p[3] = (val >> 24) & 0xFF;
7912 /// =>
7913 ///  *((i32)p) = val;
7914 ///
7915 ///  i8 *p = ...
7916 ///  i32 val = ...
7917 ///  p[0] = (val >> 24) & 0xFF;
7918 ///  p[1] = (val >> 16) & 0xFF;
7919 ///  p[2] = (val >> 8) & 0xFF;
7920 ///  p[3] = (val >> 0) & 0xFF;
7921 /// =>
7922 ///  *((i32)p) = BSWAP(val);
7923 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7924   // The matching looks for "store (trunc x)" patterns that appear early but are
7925   // likely to be replaced by truncating store nodes during combining.
7926   // TODO: If there is evidence that running this later would help, this
7927   //       limitation could be removed. Legality checks may need to be added
7928   //       for the created store and optional bswap/rotate.
7929   if (LegalOperations || OptLevel == CodeGenOpt::None)
7930     return SDValue();
7931
7932   // We only handle merging simple stores of 1-4 bytes.
7933   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7934   EVT MemVT = N->getMemoryVT();
7935   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7936       !N->isSimple() || N->isIndexed())
7937     return SDValue();
7938
7939   // Collect all of the stores in the chain.
7940   SDValue Chain = N->getChain();
7941   SmallVector<StoreSDNode *, 8> Stores = {N};
7942   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7943     // All stores must be the same size to ensure that we are writing all of the
7944     // bytes in the wide value.
7945     // TODO: We could allow multiple sizes by tracking each stored byte.
7946     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7947         Store->isIndexed())
7948       return SDValue();
7949     Stores.push_back(Store);
7950     Chain = Store->getChain();
7951   }
7952   // There is no reason to continue if we do not have at least a pair of stores.
7953   if (Stores.size() < 2)
7954     return SDValue();
7955
7956   // Handle simple types only.
7957   LLVMContext &Context = *DAG.getContext();
7958   unsigned NumStores = Stores.size();
7959   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7960   unsigned WideNumBits = NumStores * NarrowNumBits;
7961   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7962   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7963     return SDValue();
7964
7965   // Check if all bytes of the source value that we are looking at are stored
7966   // to the same base address. Collect offsets from Base address into OffsetMap.
7967   SDValue SourceValue;
7968   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7969   int64_t FirstOffset = INT64_MAX;
7970   StoreSDNode *FirstStore = nullptr;
7971   Optional<BaseIndexOffset> Base;
7972   for (auto *Store : Stores) {
7973     // All the stores store different parts of the CombinedValue. A truncate is
7974     // required to get the partial value.
7975     SDValue Trunc = Store->getValue();
7976     if (Trunc.getOpcode() != ISD::TRUNCATE)
7977       return SDValue();
7978     // Other than the first/last part, a shift operation is required to get the
7979     // offset.
7980     int64_t Offset = 0;
7981     SDValue WideVal = Trunc.getOperand(0);
7982     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7983         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7984       // The shift amount must be a constant multiple of the narrow type.
7985       // It is translated to the offset address in the wide source value "y".
7986       //
7987       // x = srl y, ShiftAmtC
7988       // i8 z = trunc x
7989       // store z, ...
7990       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7991       if (ShiftAmtC % NarrowNumBits != 0)
7992         return SDValue();
7993
7994       Offset = ShiftAmtC / NarrowNumBits;
7995       WideVal = WideVal.getOperand(0);
7996     }
7997
7998     // Stores must share the same source value with different offsets.
7999     // Truncate and extends should be stripped to get the single source value.
8000     if (!SourceValue)
8001       SourceValue = WideVal;
8002     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8003       return SDValue();
8004     else if (SourceValue.getValueType() != WideVT) {
8005       if (WideVal.getValueType() == WideVT ||
8006           WideVal.getScalarValueSizeInBits() >
8007               SourceValue.getScalarValueSizeInBits())
8008         SourceValue = WideVal;
8009       // Give up if the source value type is smaller than the store size.
8010       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8011         return SDValue();
8012     }
8013
8014     // Stores must share the same base address.
8015     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
8016     int64_t ByteOffsetFromBase = 0;
8017     if (!Base)
8018       Base = Ptr;
8019     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8020       return SDValue();
8021
8022     // Remember the first store.
8023     if (ByteOffsetFromBase < FirstOffset) {
8024       FirstStore = Store;
8025       FirstOffset = ByteOffsetFromBase;
8026     }
8027     // Map the offset in the store and the offset in the combined value, and
8028     // early return if it has been set before.
8029     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8030       return SDValue();
8031     OffsetMap[Offset] = ByteOffsetFromBase;
8032   }
8033
8034   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8035   assert(FirstStore && "First store must be set");
8036
8037   // Check that a store of the wide type is both allowed and fast on the target
8038   const DataLayout &Layout = DAG.getDataLayout();
8039   bool Fast = false;
8040   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8041                                         *FirstStore->getMemOperand(), &Fast);
8042   if (!Allowed || !Fast)
8043     return SDValue();
8044
8045   // Check if the pieces of the value are going to the expected places in memory
8046   // to merge the stores.
8047   auto checkOffsets = [&](bool MatchLittleEndian) {
8048     if (MatchLittleEndian) {
8049       for (unsigned i = 0; i != NumStores; ++i)
8050         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8051           return false;
8052     } else { // MatchBigEndian by reversing loop counter.
8053       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8054         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8055           return false;
8056     }
8057     return true;
8058   };
8059
8060   // Check if the offsets line up for the native data layout of this target.
8061   bool NeedBswap = false;
8062   bool NeedRotate = false;
8063   if (!checkOffsets(Layout.isLittleEndian())) {
8064     // Special-case: check if byte offsets line up for the opposite endian.
8065     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8066       NeedBswap = true;
8067     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8068       NeedRotate = true;
8069     else
8070       return SDValue();
8071   }
8072
8073   SDLoc DL(N);
8074   if (WideVT != SourceValue.getValueType()) {
8075     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8076            "Unexpected store value to merge");
8077     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8078   }
8079
8080   // Before legalize we can introduce illegal bswaps/rotates which will be later
8081   // converted to an explicit bswap sequence. This way we end up with a single
8082   // store and byte shuffling instead of several stores and byte shuffling.
8083   if (NeedBswap) {
8084     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8085   } else if (NeedRotate) {
8086     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8087     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8088     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8089   }
8090
8091   SDValue NewStore =
8092       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8093                    FirstStore->getPointerInfo(), FirstStore->getAlign());
8094
8095   // Rely on other DAG combine rules to remove the other individual stores.
8096   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8097   return NewStore;
8098 }
8099
8100 /// Match a pattern where a wide type scalar value is loaded by several narrow
8101 /// loads and combined by shifts and ors. Fold it into a single load or a load
8102 /// and a BSWAP if the targets supports it.
8103 ///
8104 /// Assuming little endian target:
8105 ///  i8 *a = ...
8106 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8107 /// =>
8108 ///  i32 val = *((i32)a)
8109 ///
8110 ///  i8 *a = ...
8111 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8112 /// =>
8113 ///  i32 val = BSWAP(*((i32)a))
8114 ///
8115 /// TODO: This rule matches complex patterns with OR node roots and doesn't
8116 /// interact well with the worklist mechanism. When a part of the pattern is
8117 /// updated (e.g. one of the loads) its direct users are put into the worklist,
8118 /// but the root node of the pattern which triggers the load combine is not
8119 /// necessarily a direct user of the changed node. For example, once the address
8120 /// of t28 load is reassociated load combine won't be triggered:
8121 ///             t25: i32 = add t4, Constant:i32<2>
8122 ///           t26: i64 = sign_extend t25
8123 ///        t27: i64 = add t2, t26
8124 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8125 ///     t29: i32 = zero_extend t28
8126 ///   t32: i32 = shl t29, Constant:i8<8>
8127 /// t33: i32 = or t23, t32
8128 /// As a possible fix visitLoad can check if the load can be a part of a load
8129 /// combine pattern and add corresponding OR roots to the worklist.
8130 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8131   assert(N->getOpcode() == ISD::OR &&
8132          "Can only match load combining against OR nodes");
8133
8134   // Handles simple types only
8135   EVT VT = N->getValueType(0);
8136   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8137     return SDValue();
8138   unsigned ByteWidth = VT.getSizeInBits() / 8;
8139
8140   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8141   auto MemoryByteOffset = [&] (ByteProvider P) {
8142     assert(P.isMemory() && "Must be a memory byte provider");
8143     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
8144     assert(LoadBitWidth % 8 == 0 &&
8145            "can only analyze providers for individual bytes not bit");
8146     unsigned LoadByteWidth = LoadBitWidth / 8;
8147     return IsBigEndianTarget
8148             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
8149             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
8150   };
8151
8152   Optional<BaseIndexOffset> Base;
8153   SDValue Chain;
8154
8155   SmallPtrSet<LoadSDNode *, 8> Loads;
8156   Optional<ByteProvider> FirstByteProvider;
8157   int64_t FirstOffset = INT64_MAX;
8158
8159   // Check if all the bytes of the OR we are looking at are loaded from the same
8160   // base address. Collect bytes offsets from Base address in ByteOffsets.
8161   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8162   unsigned ZeroExtendedBytes = 0;
8163   for (int i = ByteWidth - 1; i >= 0; --i) {
8164     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
8165     if (!P)
8166       return SDValue();
8167
8168     if (P->isConstantZero()) {
8169       // It's OK for the N most significant bytes to be 0, we can just
8170       // zero-extend the load.
8171       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
8172         return SDValue();
8173       continue;
8174     }
8175     assert(P->isMemory() && "provenance should either be memory or zero");
8176
8177     LoadSDNode *L = P->Load;
8178     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
8179            !L->isIndexed() &&
8180            "Must be enforced by calculateByteProvider");
8181     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
8182
8183     // All loads must share the same chain
8184     SDValue LChain = L->getChain();
8185     if (!Chain)
8186       Chain = LChain;
8187     else if (Chain != LChain)
8188       return SDValue();
8189
8190     // Loads must share the same base address
8191     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
8192     int64_t ByteOffsetFromBase = 0;
8193     if (!Base)
8194       Base = Ptr;
8195     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8196       return SDValue();
8197
8198     // Calculate the offset of the current byte from the base address
8199     ByteOffsetFromBase += MemoryByteOffset(*P);
8200     ByteOffsets[i] = ByteOffsetFromBase;
8201
8202     // Remember the first byte load
8203     if (ByteOffsetFromBase < FirstOffset) {
8204       FirstByteProvider = P;
8205       FirstOffset = ByteOffsetFromBase;
8206     }
8207
8208     Loads.insert(L);
8209   }
8210   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
8211          "memory, so there must be at least one load which produces the value");
8212   assert(Base && "Base address of the accessed memory location must be set");
8213   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8214
8215   bool NeedsZext = ZeroExtendedBytes > 0;
8216
8217   EVT MemVT =
8218       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
8219
8220   if (!MemVT.isSimple())
8221     return SDValue();
8222
8223   // Before legalize we can introduce too wide illegal loads which will be later
8224   // split into legal sized loads. This enables us to combine i64 load by i8
8225   // patterns to a couple of i32 loads on 32 bit targets.
8226   if (LegalOperations &&
8227       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
8228                             MemVT))
8229     return SDValue();
8230
8231   // Check if the bytes of the OR we are looking at match with either big or
8232   // little endian value load
8233   Optional<bool> IsBigEndian = isBigEndian(
8234       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
8235   if (!IsBigEndian)
8236     return SDValue();
8237
8238   assert(FirstByteProvider && "must be set");
8239
8240   // Ensure that the first byte is loaded from zero offset of the first load.
8241   // So the combined value can be loaded from the first load address.
8242   if (MemoryByteOffset(*FirstByteProvider) != 0)
8243     return SDValue();
8244   LoadSDNode *FirstLoad = FirstByteProvider->Load;
8245
8246   // The node we are looking at matches with the pattern, check if we can
8247   // replace it with a single (possibly zero-extended) load and bswap + shift if
8248   // needed.
8249
8250   // If the load needs byte swap check if the target supports it
8251   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
8252
8253   // Before legalize we can introduce illegal bswaps which will be later
8254   // converted to an explicit bswap sequence. This way we end up with a single
8255   // load and byte shuffling instead of several loads and byte shuffling.
8256   // We do not introduce illegal bswaps when zero-extending as this tends to
8257   // introduce too many arithmetic instructions.
8258   if (NeedsBswap && (LegalOperations || NeedsZext) &&
8259       !TLI.isOperationLegal(ISD::BSWAP, VT))
8260     return SDValue();
8261
8262   // If we need to bswap and zero extend, we have to insert a shift. Check that
8263   // it is legal.
8264   if (NeedsBswap && NeedsZext && LegalOperations &&
8265       !TLI.isOperationLegal(ISD::SHL, VT))
8266     return SDValue();
8267
8268   // Check that a load of the wide type is both allowed and fast on the target
8269   bool Fast = false;
8270   bool Allowed =
8271       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
8272                              *FirstLoad->getMemOperand(), &Fast);
8273   if (!Allowed || !Fast)
8274     return SDValue();
8275
8276   SDValue NewLoad =
8277       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
8278                      Chain, FirstLoad->getBasePtr(),
8279                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
8280
8281   // Transfer chain users from old loads to the new load.
8282   for (LoadSDNode *L : Loads)
8283     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
8284
8285   if (!NeedsBswap)
8286     return NewLoad;
8287
8288   SDValue ShiftedLoad =
8289       NeedsZext
8290           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
8291                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
8292                                                    SDLoc(N), LegalOperations))
8293           : NewLoad;
8294   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
8295 }
8296
8297 // If the target has andn, bsl, or a similar bit-select instruction,
8298 // we want to unfold masked merge, with canonical pattern of:
8299 //   |        A  |  |B|
8300 //   ((x ^ y) & m) ^ y
8301 //    |  D  |
8302 // Into:
8303 //   (x & m) | (y & ~m)
8304 // If y is a constant, m is not a 'not', and the 'andn' does not work with
8305 // immediates, we unfold into a different pattern:
8306 //   ~(~x & m) & (m | y)
8307 // If x is a constant, m is a 'not', and the 'andn' does not work with
8308 // immediates, we unfold into a different pattern:
8309 //   (x | ~m) & ~(~m & ~y)
8310 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
8311 //       the very least that breaks andnpd / andnps patterns, and because those
8312 //       patterns are simplified in IR and shouldn't be created in the DAG
8313 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
8314   assert(N->getOpcode() == ISD::XOR);
8315
8316   // Don't touch 'not' (i.e. where y = -1).
8317   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
8318     return SDValue();
8319
8320   EVT VT = N->getValueType(0);
8321
8322   // There are 3 commutable operators in the pattern,
8323   // so we have to deal with 8 possible variants of the basic pattern.
8324   SDValue X, Y, M;
8325   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
8326     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
8327       return false;
8328     SDValue Xor = And.getOperand(XorIdx);
8329     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
8330       return false;
8331     SDValue Xor0 = Xor.getOperand(0);
8332     SDValue Xor1 = Xor.getOperand(1);
8333     // Don't touch 'not' (i.e. where y = -1).
8334     if (isAllOnesOrAllOnesSplat(Xor1))
8335       return false;
8336     if (Other == Xor0)
8337       std::swap(Xor0, Xor1);
8338     if (Other != Xor1)
8339       return false;
8340     X = Xor0;
8341     Y = Xor1;
8342     M = And.getOperand(XorIdx ? 0 : 1);
8343     return true;
8344   };
8345
8346   SDValue N0 = N->getOperand(0);
8347   SDValue N1 = N->getOperand(1);
8348   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
8349       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
8350     return SDValue();
8351
8352   // Don't do anything if the mask is constant. This should not be reachable.
8353   // InstCombine should have already unfolded this pattern, and DAGCombiner
8354   // probably shouldn't produce it, too.
8355   if (isa<ConstantSDNode>(M.getNode()))
8356     return SDValue();
8357
8358   // We can transform if the target has AndNot
8359   if (!TLI.hasAndNot(M))
8360     return SDValue();
8361
8362   SDLoc DL(N);
8363
8364   // If Y is a constant, check that 'andn' works with immediates. Unless M is
8365   // a bitwise not that would already allow ANDN to be used.
8366   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8367     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8368     // If not, we need to do a bit more work to make sure andn is still used.
8369     SDValue NotX = DAG.getNOT(DL, X, VT);
8370     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8371     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8372     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8373     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8374   }
8375
8376   // If X is a constant and M is a bitwise not, check that 'andn' works with
8377   // immediates.
8378   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8379     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8380     // If not, we need to do a bit more work to make sure andn is still used.
8381     SDValue NotM = M.getOperand(0);
8382     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8383     SDValue NotY = DAG.getNOT(DL, Y, VT);
8384     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8385     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8386     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8387   }
8388
8389   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8390   SDValue NotM = DAG.getNOT(DL, M, VT);
8391   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8392
8393   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8394 }
8395
8396 SDValue DAGCombiner::visitXOR(SDNode *N) {
8397   SDValue N0 = N->getOperand(0);
8398   SDValue N1 = N->getOperand(1);
8399   EVT VT = N0.getValueType();
8400   SDLoc DL(N);
8401
8402   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8403   if (N0.isUndef() && N1.isUndef())
8404     return DAG.getConstant(0, DL, VT);
8405
8406   // fold (xor x, undef) -> undef
8407   if (N0.isUndef())
8408     return N0;
8409   if (N1.isUndef())
8410     return N1;
8411
8412   // fold (xor c1, c2) -> c1^c2
8413   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8414     return C;
8415
8416   // canonicalize constant to RHS
8417   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8418       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8419     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8420
8421   // fold vector ops
8422   if (VT.isVector()) {
8423     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8424       return FoldedVOp;
8425
8426     // fold (xor x, 0) -> x, vector edition
8427     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8428       return N0;
8429   }
8430
8431   // fold (xor x, 0) -> x
8432   if (isNullConstant(N1))
8433     return N0;
8434
8435   if (SDValue NewSel = foldBinOpIntoSelect(N))
8436     return NewSel;
8437
8438   // reassociate xor
8439   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8440     return RXOR;
8441
8442   // look for 'add-like' folds:
8443   // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
8444   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8445       isMinSignedConstant(N1))
8446     if (SDValue Combined = visitADDLike(N))
8447       return Combined;
8448
8449   // fold !(x cc y) -> (x !cc y)
8450   unsigned N0Opcode = N0.getOpcode();
8451   SDValue LHS, RHS, CC;
8452   if (TLI.isConstTrueVal(N1) &&
8453       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8454     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8455                                                LHS.getValueType());
8456     if (!LegalOperations ||
8457         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8458       switch (N0Opcode) {
8459       default:
8460         llvm_unreachable("Unhandled SetCC Equivalent!");
8461       case ISD::SETCC:
8462         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8463       case ISD::SELECT_CC:
8464         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8465                                N0.getOperand(3), NotCC);
8466       case ISD::STRICT_FSETCC:
8467       case ISD::STRICT_FSETCCS: {
8468         if (N0.hasOneUse()) {
8469           // FIXME Can we handle multiple uses? Could we token factor the chain
8470           // results from the new/old setcc?
8471           SDValue SetCC =
8472               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8473                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8474           CombineTo(N, SetCC);
8475           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8476           recursivelyDeleteUnusedNodes(N0.getNode());
8477           return SDValue(N, 0); // Return N so it doesn't get rechecked!
8478         }
8479         break;
8480       }
8481       }
8482     }
8483   }
8484
8485   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8486   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8487       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8488     SDValue V = N0.getOperand(0);
8489     SDLoc DL0(N0);
8490     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8491                     DAG.getConstant(1, DL0, V.getValueType()));
8492     AddToWorklist(V.getNode());
8493     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8494   }
8495
8496   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8497   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8498       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8499     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8500     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8501       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8502       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8503       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8504       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8505       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8506     }
8507   }
8508   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8509   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8510       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8511     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8512     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8513       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8514       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8515       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8516       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8517       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8518     }
8519   }
8520
8521   // fold (not (neg x)) -> (add X, -1)
8522   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8523   // Y is a constant or the subtract has a single use.
8524   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8525       isNullConstant(N0.getOperand(0))) {
8526     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8527                        DAG.getAllOnesConstant(DL, VT));
8528   }
8529
8530   // fold (not (add X, -1)) -> (neg X)
8531   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8532       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8533     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8534                        N0.getOperand(0));
8535   }
8536
8537   // fold (xor (and x, y), y) -> (and (not x), y)
8538   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8539     SDValue X = N0.getOperand(0);
8540     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8541     AddToWorklist(NotX.getNode());
8542     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8543   }
8544
8545   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8546   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8547     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8548     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8549     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8550       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8551       SDValue S0 = S.getOperand(0);
8552       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8553         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8554           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8555             return DAG.getNode(ISD::ABS, DL, VT, S0);
8556     }
8557   }
8558
8559   // fold (xor x, x) -> 0
8560   if (N0 == N1)
8561     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8562
8563   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8564   // Here is a concrete example of this equivalence:
8565   // i16   x ==  14
8566   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
8567   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8568   //
8569   // =>
8570   //
8571   // i16     ~1      == 0b1111111111111110
8572   // i16 rol(~1, 14) == 0b1011111111111111
8573   //
8574   // Some additional tips to help conceptualize this transform:
8575   // - Try to see the operation as placing a single zero in a value of all ones.
8576   // - There exists no value for x which would allow the result to contain zero.
8577   // - Values of x larger than the bitwidth are undefined and do not require a
8578   //   consistent result.
8579   // - Pushing the zero left requires shifting one bits in from the right.
8580   // A rotate left of ~1 is a nice way of achieving the desired result.
8581   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8582       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8583     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8584                        N0.getOperand(1));
8585   }
8586
8587   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
8588   if (N0Opcode == N1.getOpcode())
8589     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8590       return V;
8591
8592   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8593     return R;
8594   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8595     return R;
8596
8597   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
8598   if (SDValue MM = unfoldMaskedMerge(N))
8599     return MM;
8600
8601   // Simplify the expression using non-local knowledge.
8602   if (SimplifyDemandedBits(SDValue(N, 0)))
8603     return SDValue(N, 0);
8604
8605   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8606     return Combined;
8607
8608   return SDValue();
8609 }
8610
8611 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8612 /// shift-by-constant operand with identical opcode, we may be able to convert
8613 /// that into 2 independent shifts followed by the logic op. This is a
8614 /// throughput improvement.
8615 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8616   // Match a one-use bitwise logic op.
8617   SDValue LogicOp = Shift->getOperand(0);
8618   if (!LogicOp.hasOneUse())
8619     return SDValue();
8620
8621   unsigned LogicOpcode = LogicOp.getOpcode();
8622   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8623       LogicOpcode != ISD::XOR)
8624     return SDValue();
8625
8626   // Find a matching one-use shift by constant.
8627   unsigned ShiftOpcode = Shift->getOpcode();
8628   SDValue C1 = Shift->getOperand(1);
8629   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8630   assert(C1Node && "Expected a shift with constant operand");
8631   const APInt &C1Val = C1Node->getAPIntValue();
8632   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8633                              const APInt *&ShiftAmtVal) {
8634     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8635       return false;
8636
8637     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8638     if (!ShiftCNode)
8639       return false;
8640
8641     // Capture the shifted operand and shift amount value.
8642     ShiftOp = V.getOperand(0);
8643     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8644
8645     // Shift amount types do not have to match their operand type, so check that
8646     // the constants are the same width.
8647     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8648       return false;
8649
8650     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8651     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8652       return false;
8653
8654     return true;
8655   };
8656
8657   // Logic ops are commutative, so check each operand for a match.
8658   SDValue X, Y;
8659   const APInt *C0Val;
8660   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8661     Y = LogicOp.getOperand(1);
8662   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8663     Y = LogicOp.getOperand(0);
8664   else
8665     return SDValue();
8666
8667   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8668   SDLoc DL(Shift);
8669   EVT VT = Shift->getValueType(0);
8670   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8671   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8672   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8673   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8674   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8675 }
8676
8677 /// Handle transforms common to the three shifts, when the shift amount is a
8678 /// constant.
8679 /// We are looking for: (shift being one of shl/sra/srl)
8680 ///   shift (binop X, C0), C1
8681 /// And want to transform into:
8682 ///   binop (shift X, C1), (shift C0, C1)
8683 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8684   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8685
8686   // Do not turn a 'not' into a regular xor.
8687   if (isBitwiseNot(N->getOperand(0)))
8688     return SDValue();
8689
8690   // The inner binop must be one-use, since we want to replace it.
8691   SDValue LHS = N->getOperand(0);
8692   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8693     return SDValue();
8694
8695   // TODO: This is limited to early combining because it may reveal regressions
8696   //       otherwise. But since we just checked a target hook to see if this is
8697   //       desirable, that should have filtered out cases where this interferes
8698   //       with some other pattern matching.
8699   if (!LegalTypes)
8700     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8701       return R;
8702
8703   // We want to pull some binops through shifts, so that we have (and (shift))
8704   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8705   // thing happens with address calculations, so it's important to canonicalize
8706   // it.
8707   switch (LHS.getOpcode()) {
8708   default:
8709     return SDValue();
8710   case ISD::OR:
8711   case ISD::XOR:
8712   case ISD::AND:
8713     break;
8714   case ISD::ADD:
8715     if (N->getOpcode() != ISD::SHL)
8716       return SDValue(); // only shl(add) not sr[al](add).
8717     break;
8718   }
8719
8720   // We require the RHS of the binop to be a constant and not opaque as well.
8721   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8722   if (!BinOpCst)
8723     return SDValue();
8724
8725   // FIXME: disable this unless the input to the binop is a shift by a constant
8726   // or is copy/select. Enable this in other cases when figure out it's exactly
8727   // profitable.
8728   SDValue BinOpLHSVal = LHS.getOperand(0);
8729   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8730                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8731                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8732                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8733   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8734                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8735
8736   if (!IsShiftByConstant && !IsCopyOrSelect)
8737     return SDValue();
8738
8739   if (IsCopyOrSelect && N->hasOneUse())
8740     return SDValue();
8741
8742   // Fold the constants, shifting the binop RHS by the shift amount.
8743   SDLoc DL(N);
8744   EVT VT = N->getValueType(0);
8745   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8746                                N->getOperand(1));
8747   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8748
8749   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8750                                  N->getOperand(1));
8751   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8752 }
8753
8754 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8755   assert(N->getOpcode() == ISD::TRUNCATE);
8756   assert(N->getOperand(0).getOpcode() == ISD::AND);
8757
8758   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8759   EVT TruncVT = N->getValueType(0);
8760   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8761       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8762     SDValue N01 = N->getOperand(0).getOperand(1);
8763     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8764       SDLoc DL(N);
8765       SDValue N00 = N->getOperand(0).getOperand(0);
8766       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8767       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8768       AddToWorklist(Trunc00.getNode());
8769       AddToWorklist(Trunc01.getNode());
8770       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8771     }
8772   }
8773
8774   return SDValue();
8775 }
8776
8777 SDValue DAGCombiner::visitRotate(SDNode *N) {
8778   SDLoc dl(N);
8779   SDValue N0 = N->getOperand(0);
8780   SDValue N1 = N->getOperand(1);
8781   EVT VT = N->getValueType(0);
8782   unsigned Bitsize = VT.getScalarSizeInBits();
8783
8784   // fold (rot x, 0) -> x
8785   if (isNullOrNullSplat(N1))
8786     return N0;
8787
8788   // fold (rot x, c) -> x iff (c % BitSize) == 0
8789   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8790     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8791     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8792       return N0;
8793   }
8794
8795   // fold (rot x, c) -> (rot x, c % BitSize)
8796   bool OutOfRange = false;
8797   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8798     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8799     return true;
8800   };
8801   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8802     EVT AmtVT = N1.getValueType();
8803     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8804     if (SDValue Amt =
8805             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8806       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8807   }
8808
8809   // rot i16 X, 8 --> bswap X
8810   auto *RotAmtC = isConstOrConstSplat(N1);
8811   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8812       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8813     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8814
8815   // Simplify the operands using demanded-bits information.
8816   if (SimplifyDemandedBits(SDValue(N, 0)))
8817     return SDValue(N, 0);
8818
8819   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8820   if (N1.getOpcode() == ISD::TRUNCATE &&
8821       N1.getOperand(0).getOpcode() == ISD::AND) {
8822     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8823       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8824   }
8825
8826   unsigned NextOp = N0.getOpcode();
8827
8828   // fold (rot* (rot* x, c2), c1)
8829   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
8830   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8831     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8832     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8833     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8834       EVT ShiftVT = C1->getValueType(0);
8835       bool SameSide = (N->getOpcode() == NextOp);
8836       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8837       SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8838       SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
8839                                                  {N1, BitsizeC});
8840       SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
8841                                                  {N0.getOperand(1), BitsizeC});
8842       if (Norm1 && Norm2)
8843         if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8844                 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
8845           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8846               ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8847           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8848                              CombinedShiftNorm);
8849         }
8850     }
8851   }
8852   return SDValue();
8853 }
8854
8855 SDValue DAGCombiner::visitSHL(SDNode *N) {
8856   SDValue N0 = N->getOperand(0);
8857   SDValue N1 = N->getOperand(1);
8858   if (SDValue V = DAG.simplifyShift(N0, N1))
8859     return V;
8860
8861   EVT VT = N0.getValueType();
8862   EVT ShiftVT = N1.getValueType();
8863   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8864
8865   // fold (shl c1, c2) -> c1<<c2
8866   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8867     return C;
8868
8869   // fold vector ops
8870   if (VT.isVector()) {
8871     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8872       return FoldedVOp;
8873
8874     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8875     // If setcc produces all-one true value then:
8876     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8877     if (N1CV && N1CV->isConstant()) {
8878       if (N0.getOpcode() == ISD::AND) {
8879         SDValue N00 = N0->getOperand(0);
8880         SDValue N01 = N0->getOperand(1);
8881         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8882
8883         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8884             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8885                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8886           if (SDValue C =
8887                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8888             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8889         }
8890       }
8891     }
8892   }
8893
8894   if (SDValue NewSel = foldBinOpIntoSelect(N))
8895     return NewSel;
8896
8897   // if (shl x, c) is known to be zero, return 0
8898   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8899     return DAG.getConstant(0, SDLoc(N), VT);
8900
8901   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8902   if (N1.getOpcode() == ISD::TRUNCATE &&
8903       N1.getOperand(0).getOpcode() == ISD::AND) {
8904     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8905       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8906   }
8907
8908   if (SimplifyDemandedBits(SDValue(N, 0)))
8909     return SDValue(N, 0);
8910
8911   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8912   if (N0.getOpcode() == ISD::SHL) {
8913     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8914                                           ConstantSDNode *RHS) {
8915       APInt c1 = LHS->getAPIntValue();
8916       APInt c2 = RHS->getAPIntValue();
8917       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8918       return (c1 + c2).uge(OpSizeInBits);
8919     };
8920     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8921       return DAG.getConstant(0, SDLoc(N), VT);
8922
8923     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8924                                        ConstantSDNode *RHS) {
8925       APInt c1 = LHS->getAPIntValue();
8926       APInt c2 = RHS->getAPIntValue();
8927       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8928       return (c1 + c2).ult(OpSizeInBits);
8929     };
8930     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8931       SDLoc DL(N);
8932       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8933       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8934     }
8935   }
8936
8937   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8938   // For this to be valid, the second form must not preserve any of the bits
8939   // that are shifted out by the inner shift in the first form.  This means
8940   // the outer shift size must be >= the number of bits added by the ext.
8941   // As a corollary, we don't care what kind of ext it is.
8942   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8943        N0.getOpcode() == ISD::ANY_EXTEND ||
8944        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8945       N0.getOperand(0).getOpcode() == ISD::SHL) {
8946     SDValue N0Op0 = N0.getOperand(0);
8947     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8948     EVT InnerVT = N0Op0.getValueType();
8949     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8950
8951     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8952                                                          ConstantSDNode *RHS) {
8953       APInt c1 = LHS->getAPIntValue();
8954       APInt c2 = RHS->getAPIntValue();
8955       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8956       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8957              (c1 + c2).uge(OpSizeInBits);
8958     };
8959     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8960                                   /*AllowUndefs*/ false,
8961                                   /*AllowTypeMismatch*/ true))
8962       return DAG.getConstant(0, SDLoc(N), VT);
8963
8964     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8965                                                       ConstantSDNode *RHS) {
8966       APInt c1 = LHS->getAPIntValue();
8967       APInt c2 = RHS->getAPIntValue();
8968       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8969       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8970              (c1 + c2).ult(OpSizeInBits);
8971     };
8972     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8973                                   /*AllowUndefs*/ false,
8974                                   /*AllowTypeMismatch*/ true)) {
8975       SDLoc DL(N);
8976       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8977       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8978       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8979       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8980     }
8981   }
8982
8983   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8984   // Only fold this if the inner zext has no other uses to avoid increasing
8985   // the total number of instructions.
8986   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8987       N0.getOperand(0).getOpcode() == ISD::SRL) {
8988     SDValue N0Op0 = N0.getOperand(0);
8989     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8990
8991     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8992       APInt c1 = LHS->getAPIntValue();
8993       APInt c2 = RHS->getAPIntValue();
8994       zeroExtendToMatch(c1, c2);
8995       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8996     };
8997     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8998                                   /*AllowUndefs*/ false,
8999                                   /*AllowTypeMismatch*/ true)) {
9000       SDLoc DL(N);
9001       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9002       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9003       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9004       AddToWorklist(NewSHL.getNode());
9005       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9006     }
9007   }
9008
9009   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9010     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9011                                            ConstantSDNode *RHS) {
9012       const APInt &LHSC = LHS->getAPIntValue();
9013       const APInt &RHSC = RHS->getAPIntValue();
9014       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9015              LHSC.getZExtValue() <= RHSC.getZExtValue();
9016     };
9017
9018     SDLoc DL(N);
9019
9020     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
9021     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9022     if (N0->getFlags().hasExact()) {
9023       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9024                                     /*AllowUndefs*/ false,
9025                                     /*AllowTypeMismatch*/ true)) {
9026         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9027         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9028         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9029       }
9030       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9031                                     /*AllowUndefs*/ false,
9032                                     /*AllowTypeMismatch*/ true)) {
9033         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9034         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9035         return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9036       }
9037     }
9038
9039     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9040     //                               (and (srl x, (sub c1, c2), MASK)
9041     // Only fold this if the inner shift has no other uses -- if it does,
9042     // folding this will increase the total number of instructions.
9043     if (N0.getOpcode() == ISD::SRL &&
9044         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9045         TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9046       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9047                                     /*AllowUndefs*/ false,
9048                                     /*AllowTypeMismatch*/ true)) {
9049         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9050         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9051         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9052         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9053         Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9054         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9055         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9056       }
9057       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9058                                     /*AllowUndefs*/ false,
9059                                     /*AllowTypeMismatch*/ true)) {
9060         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9061         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9062         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9063         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9064         SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9065         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9066       }
9067     }
9068   }
9069
9070   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9071   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9072       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9073     SDLoc DL(N);
9074     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9075     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9076     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9077   }
9078
9079   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9080   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9081   // Variant of version done on multiply, except mul by a power of 2 is turned
9082   // into a shift.
9083   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9084       N0->hasOneUse() &&
9085       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
9086       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
9087       TLI.isDesirableToCommuteWithShift(N, Level)) {
9088     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9089     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
9090     AddToWorklist(Shl0.getNode());
9091     AddToWorklist(Shl1.getNode());
9092     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
9093   }
9094
9095   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9096   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9097     SDValue N01 = N0.getOperand(1);
9098     if (SDValue Shl =
9099             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9100       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9101   }
9102
9103   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9104   if (N1C && !N1C->isOpaque())
9105     if (SDValue NewSHL = visitShiftByConstant(N))
9106       return NewSHL;
9107
9108   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9109   if (N0.getOpcode() == ISD::VSCALE)
9110     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
9111       const APInt &C0 = N0.getConstantOperandAPInt(0);
9112       const APInt &C1 = NC1->getAPIntValue();
9113       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9114     }
9115
9116   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9117   APInt ShlVal;
9118   if (N0.getOpcode() == ISD::STEP_VECTOR)
9119     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
9120       const APInt &C0 = N0.getConstantOperandAPInt(0);
9121       if (ShlVal.ult(C0.getBitWidth())) {
9122         APInt NewStep = C0 << ShlVal;
9123         return DAG.getStepVector(SDLoc(N), VT, NewStep);
9124       }
9125     }
9126
9127   return SDValue();
9128 }
9129
9130 // Transform a right shift of a multiply into a multiply-high.
9131 // Examples:
9132 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
9133 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
9134 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
9135                                   const TargetLowering &TLI) {
9136   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9137          "SRL or SRA node is required here!");
9138
9139   // Check the shift amount. Proceed with the transformation if the shift
9140   // amount is constant.
9141   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9142   if (!ShiftAmtSrc)
9143     return SDValue();
9144
9145   SDLoc DL(N);
9146
9147   // The operation feeding into the shift must be a multiply.
9148   SDValue ShiftOperand = N->getOperand(0);
9149   if (ShiftOperand.getOpcode() != ISD::MUL)
9150     return SDValue();
9151
9152   // Both operands must be equivalent extend nodes.
9153   SDValue LeftOp = ShiftOperand.getOperand(0);
9154   SDValue RightOp = ShiftOperand.getOperand(1);
9155
9156   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9157   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9158
9159   if (!IsSignExt && !IsZeroExt)
9160     return SDValue();
9161
9162   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9163   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9164
9165   SDValue MulhRightOp;
9166   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9167     unsigned ActiveBits = IsSignExt
9168                               ? Constant->getAPIntValue().getMinSignedBits()
9169                               : Constant->getAPIntValue().getActiveBits();
9170     if (ActiveBits > NarrowVTSize)
9171       return SDValue();
9172     MulhRightOp = DAG.getConstant(
9173         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9174         NarrowVT);
9175   } else {
9176     if (LeftOp.getOpcode() != RightOp.getOpcode())
9177       return SDValue();
9178     // Check that the two extend nodes are the same type.
9179     if (NarrowVT != RightOp.getOperand(0).getValueType())
9180       return SDValue();
9181     MulhRightOp = RightOp.getOperand(0);
9182   }
9183
9184   EVT WideVT = LeftOp.getValueType();
9185   // Proceed with the transformation if the wide types match.
9186   assert((WideVT == RightOp.getValueType()) &&
9187          "Cannot have a multiply node with two different operand types.");
9188
9189   // Proceed with the transformation if the wide type is twice as large
9190   // as the narrow type.
9191   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9192     return SDValue();
9193
9194   // Check the shift amount with the narrow type size.
9195   // Proceed with the transformation if the shift amount is the width
9196   // of the narrow type.
9197   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9198   if (ShiftAmt != NarrowVTSize)
9199     return SDValue();
9200
9201   // If the operation feeding into the MUL is a sign extend (sext),
9202   // we use mulhs. Othewise, zero extends (zext) use mulhu.
9203   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
9204
9205   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
9206   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
9207     return SDValue();
9208
9209   SDValue Result =
9210       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
9211   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
9212                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
9213 }
9214
9215 SDValue DAGCombiner::visitSRA(SDNode *N) {
9216   SDValue N0 = N->getOperand(0);
9217   SDValue N1 = N->getOperand(1);
9218   if (SDValue V = DAG.simplifyShift(N0, N1))
9219     return V;
9220
9221   EVT VT = N0.getValueType();
9222   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9223
9224   // fold (sra c1, c2) -> (sra c1, c2)
9225   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
9226     return C;
9227
9228   // Arithmetic shifting an all-sign-bit value is a no-op.
9229   // fold (sra 0, x) -> 0
9230   // fold (sra -1, x) -> -1
9231   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
9232     return N0;
9233
9234   // fold vector ops
9235   if (VT.isVector())
9236     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9237       return FoldedVOp;
9238
9239   if (SDValue NewSel = foldBinOpIntoSelect(N))
9240     return NewSel;
9241
9242   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
9243   // sext_inreg.
9244   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9245   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
9246     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
9247     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
9248     if (VT.isVector())
9249       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
9250                                VT.getVectorElementCount());
9251     if (!LegalOperations ||
9252         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
9253         TargetLowering::Legal)
9254       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9255                          N0.getOperand(0), DAG.getValueType(ExtVT));
9256     // Even if we can't convert to sext_inreg, we might be able to remove
9257     // this shift pair if the input is already sign extended.
9258     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
9259       return N0.getOperand(0);
9260   }
9261
9262   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
9263   // clamp (add c1, c2) to max shift.
9264   if (N0.getOpcode() == ISD::SRA) {
9265     SDLoc DL(N);
9266     EVT ShiftVT = N1.getValueType();
9267     EVT ShiftSVT = ShiftVT.getScalarType();
9268     SmallVector<SDValue, 16> ShiftValues;
9269
9270     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9271       APInt c1 = LHS->getAPIntValue();
9272       APInt c2 = RHS->getAPIntValue();
9273       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9274       APInt Sum = c1 + c2;
9275       unsigned ShiftSum =
9276           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
9277       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
9278       return true;
9279     };
9280     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
9281       SDValue ShiftValue;
9282       if (N1.getOpcode() == ISD::BUILD_VECTOR)
9283         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
9284       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
9285         assert(ShiftValues.size() == 1 &&
9286                "Expected matchBinaryPredicate to return one element for "
9287                "SPLAT_VECTORs");
9288         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
9289       } else
9290         ShiftValue = ShiftValues[0];
9291       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
9292     }
9293   }
9294
9295   // fold (sra (shl X, m), (sub result_size, n))
9296   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
9297   // result_size - n != m.
9298   // If truncate is free for the target sext(shl) is likely to result in better
9299   // code.
9300   if (N0.getOpcode() == ISD::SHL && N1C) {
9301     // Get the two constanst of the shifts, CN0 = m, CN = n.
9302     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
9303     if (N01C) {
9304       LLVMContext &Ctx = *DAG.getContext();
9305       // Determine what the truncate's result bitsize and type would be.
9306       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
9307
9308       if (VT.isVector())
9309         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9310
9311       // Determine the residual right-shift amount.
9312       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
9313
9314       // If the shift is not a no-op (in which case this should be just a sign
9315       // extend already), the truncated to type is legal, sign_extend is legal
9316       // on that type, and the truncate to that type is both legal and free,
9317       // perform the transform.
9318       if ((ShiftAmt > 0) &&
9319           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
9320           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
9321           TLI.isTruncateFree(VT, TruncVT)) {
9322         SDLoc DL(N);
9323         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
9324             getShiftAmountTy(N0.getOperand(0).getValueType()));
9325         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
9326                                     N0.getOperand(0), Amt);
9327         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
9328                                     Shift);
9329         return DAG.getNode(ISD::SIGN_EXTEND, DL,
9330                            N->getValueType(0), Trunc);
9331       }
9332     }
9333   }
9334
9335   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
9336   //   sra (add (shl X, N1C), AddC), N1C -->
9337   //   sext (add (trunc X to (width - N1C)), AddC')
9338   //   sra (sub AddC, (shl X, N1C)), N1C -->
9339   //   sext (sub AddC1',(trunc X to (width - N1C)))
9340   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
9341       N0.hasOneUse()) {
9342     bool IsAdd = N0.getOpcode() == ISD::ADD;
9343     SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
9344     if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
9345         Shl.hasOneUse()) {
9346       // TODO: AddC does not need to be a splat.
9347       if (ConstantSDNode *AddC =
9348               isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
9349         // Determine what the truncate's type would be and ask the target if
9350         // that is a free operation.
9351         LLVMContext &Ctx = *DAG.getContext();
9352         unsigned ShiftAmt = N1C->getZExtValue();
9353         EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
9354         if (VT.isVector())
9355           TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9356
9357         // TODO: The simple type check probably belongs in the default hook
9358         //       implementation and/or target-specific overrides (because
9359         //       non-simple types likely require masking when legalized), but
9360         //       that restriction may conflict with other transforms.
9361         if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
9362             TLI.isTruncateFree(VT, TruncVT)) {
9363           SDLoc DL(N);
9364           SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
9365           SDValue ShiftC =
9366               DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
9367                                   TruncVT.getScalarSizeInBits()),
9368                               DL, TruncVT);
9369           SDValue Add;
9370           if (IsAdd)
9371             Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
9372           else
9373             Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
9374           return DAG.getSExtOrTrunc(Add, DL, VT);
9375         }
9376       }
9377     }
9378   }
9379
9380   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9381   if (N1.getOpcode() == ISD::TRUNCATE &&
9382       N1.getOperand(0).getOpcode() == ISD::AND) {
9383     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9384       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9385   }
9386
9387   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9388   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9389   //      if c1 is equal to the number of bits the trunc removes
9390   // TODO - support non-uniform vector shift amounts.
9391   if (N0.getOpcode() == ISD::TRUNCATE &&
9392       (N0.getOperand(0).getOpcode() == ISD::SRL ||
9393        N0.getOperand(0).getOpcode() == ISD::SRA) &&
9394       N0.getOperand(0).hasOneUse() &&
9395       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9396     SDValue N0Op0 = N0.getOperand(0);
9397     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9398       EVT LargeVT = N0Op0.getValueType();
9399       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9400       if (LargeShift->getAPIntValue() == TruncBits) {
9401         SDLoc DL(N);
9402         EVT LargeShiftVT = getShiftAmountTy(LargeVT);
9403         SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
9404         Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
9405                           DAG.getConstant(TruncBits, DL, LargeShiftVT));
9406         SDValue SRA =
9407             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9408         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9409       }
9410     }
9411   }
9412
9413   // Simplify, based on bits shifted out of the LHS.
9414   if (SimplifyDemandedBits(SDValue(N, 0)))
9415     return SDValue(N, 0);
9416
9417   // If the sign bit is known to be zero, switch this to a SRL.
9418   if (DAG.SignBitIsZero(N0))
9419     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9420
9421   if (N1C && !N1C->isOpaque())
9422     if (SDValue NewSRA = visitShiftByConstant(N))
9423       return NewSRA;
9424
9425   // Try to transform this shift into a multiply-high if
9426   // it matches the appropriate pattern detected in combineShiftToMULH.
9427   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9428     return MULH;
9429
9430   // Attempt to convert a sra of a load into a narrower sign-extending load.
9431   if (SDValue NarrowLoad = reduceLoadWidth(N))
9432     return NarrowLoad;
9433
9434   return SDValue();
9435 }
9436
9437 SDValue DAGCombiner::visitSRL(SDNode *N) {
9438   SDValue N0 = N->getOperand(0);
9439   SDValue N1 = N->getOperand(1);
9440   if (SDValue V = DAG.simplifyShift(N0, N1))
9441     return V;
9442
9443   EVT VT = N0.getValueType();
9444   EVT ShiftVT = N1.getValueType();
9445   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9446
9447   // fold (srl c1, c2) -> c1 >>u c2
9448   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9449     return C;
9450
9451   // fold vector ops
9452   if (VT.isVector())
9453     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9454       return FoldedVOp;
9455
9456   if (SDValue NewSel = foldBinOpIntoSelect(N))
9457     return NewSel;
9458
9459   // if (srl x, c) is known to be zero, return 0
9460   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9461   if (N1C &&
9462       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9463     return DAG.getConstant(0, SDLoc(N), VT);
9464
9465   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9466   if (N0.getOpcode() == ISD::SRL) {
9467     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9468                                           ConstantSDNode *RHS) {
9469       APInt c1 = LHS->getAPIntValue();
9470       APInt c2 = RHS->getAPIntValue();
9471       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9472       return (c1 + c2).uge(OpSizeInBits);
9473     };
9474     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9475       return DAG.getConstant(0, SDLoc(N), VT);
9476
9477     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9478                                        ConstantSDNode *RHS) {
9479       APInt c1 = LHS->getAPIntValue();
9480       APInt c2 = RHS->getAPIntValue();
9481       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9482       return (c1 + c2).ult(OpSizeInBits);
9483     };
9484     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9485       SDLoc DL(N);
9486       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9487       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9488     }
9489   }
9490
9491   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9492       N0.getOperand(0).getOpcode() == ISD::SRL) {
9493     SDValue InnerShift = N0.getOperand(0);
9494     // TODO - support non-uniform vector shift amounts.
9495     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9496       uint64_t c1 = N001C->getZExtValue();
9497       uint64_t c2 = N1C->getZExtValue();
9498       EVT InnerShiftVT = InnerShift.getValueType();
9499       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9500       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9501       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9502       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9503       if (c1 + OpSizeInBits == InnerShiftSize) {
9504         SDLoc DL(N);
9505         if (c1 + c2 >= InnerShiftSize)
9506           return DAG.getConstant(0, DL, VT);
9507         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9508         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9509                                        InnerShift.getOperand(0), NewShiftAmt);
9510         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9511       }
9512       // In the more general case, we can clear the high bits after the shift:
9513       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9514       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9515           c1 + c2 < InnerShiftSize) {
9516         SDLoc DL(N);
9517         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9518         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9519                                        InnerShift.getOperand(0), NewShiftAmt);
9520         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9521                                                             OpSizeInBits - c2),
9522                                        DL, InnerShiftVT);
9523         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9524         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9525       }
9526     }
9527   }
9528
9529   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
9530   //                               (and (srl x, (sub c2, c1), MASK)
9531   if (N0.getOpcode() == ISD::SHL &&
9532       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
9533       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9534     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9535                                            ConstantSDNode *RHS) {
9536       const APInt &LHSC = LHS->getAPIntValue();
9537       const APInt &RHSC = RHS->getAPIntValue();
9538       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9539              LHSC.getZExtValue() <= RHSC.getZExtValue();
9540     };
9541     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9542                                   /*AllowUndefs*/ false,
9543                                   /*AllowTypeMismatch*/ true)) {
9544       SDLoc DL(N);
9545       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9546       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9547       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9548       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
9549       Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
9550       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9551       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9552     }
9553     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9554                                   /*AllowUndefs*/ false,
9555                                   /*AllowTypeMismatch*/ true)) {
9556       SDLoc DL(N);
9557       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9558       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9559       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9560       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
9561       SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9562       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9563     }
9564   }
9565
9566   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9567   // TODO - support non-uniform vector shift amounts.
9568   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9569     // Shifting in all undef bits?
9570     EVT SmallVT = N0.getOperand(0).getValueType();
9571     unsigned BitSize = SmallVT.getScalarSizeInBits();
9572     if (N1C->getAPIntValue().uge(BitSize))
9573       return DAG.getUNDEF(VT);
9574
9575     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9576       uint64_t ShiftAmt = N1C->getZExtValue();
9577       SDLoc DL0(N0);
9578       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9579                                        N0.getOperand(0),
9580                           DAG.getConstant(ShiftAmt, DL0,
9581                                           getShiftAmountTy(SmallVT)));
9582       AddToWorklist(SmallShift.getNode());
9583       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9584       SDLoc DL(N);
9585       return DAG.getNode(ISD::AND, DL, VT,
9586                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9587                          DAG.getConstant(Mask, DL, VT));
9588     }
9589   }
9590
9591   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
9592   // bit, which is unmodified by sra.
9593   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9594     if (N0.getOpcode() == ISD::SRA)
9595       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9596   }
9597
9598   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
9599   if (N1C && N0.getOpcode() == ISD::CTLZ &&
9600       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9601     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9602
9603     // If any of the input bits are KnownOne, then the input couldn't be all
9604     // zeros, thus the result of the srl will always be zero.
9605     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9606
9607     // If all of the bits input the to ctlz node are known to be zero, then
9608     // the result of the ctlz is "32" and the result of the shift is one.
9609     APInt UnknownBits = ~Known.Zero;
9610     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9611
9612     // Otherwise, check to see if there is exactly one bit input to the ctlz.
9613     if (UnknownBits.isPowerOf2()) {
9614       // Okay, we know that only that the single bit specified by UnknownBits
9615       // could be set on input to the CTLZ node. If this bit is set, the SRL
9616       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9617       // to an SRL/XOR pair, which is likely to simplify more.
9618       unsigned ShAmt = UnknownBits.countTrailingZeros();
9619       SDValue Op = N0.getOperand(0);
9620
9621       if (ShAmt) {
9622         SDLoc DL(N0);
9623         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9624                   DAG.getConstant(ShAmt, DL,
9625                                   getShiftAmountTy(Op.getValueType())));
9626         AddToWorklist(Op.getNode());
9627       }
9628
9629       SDLoc DL(N);
9630       return DAG.getNode(ISD::XOR, DL, VT,
9631                          Op, DAG.getConstant(1, DL, VT));
9632     }
9633   }
9634
9635   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9636   if (N1.getOpcode() == ISD::TRUNCATE &&
9637       N1.getOperand(0).getOpcode() == ISD::AND) {
9638     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9639       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9640   }
9641
9642   // fold operands of srl based on knowledge that the low bits are not
9643   // demanded.
9644   if (SimplifyDemandedBits(SDValue(N, 0)))
9645     return SDValue(N, 0);
9646
9647   if (N1C && !N1C->isOpaque())
9648     if (SDValue NewSRL = visitShiftByConstant(N))
9649       return NewSRL;
9650
9651   // Attempt to convert a srl of a load into a narrower zero-extending load.
9652   if (SDValue NarrowLoad = reduceLoadWidth(N))
9653     return NarrowLoad;
9654
9655   // Here is a common situation. We want to optimize:
9656   //
9657   //   %a = ...
9658   //   %b = and i32 %a, 2
9659   //   %c = srl i32 %b, 1
9660   //   brcond i32 %c ...
9661   //
9662   // into
9663   //
9664   //   %a = ...
9665   //   %b = and %a, 2
9666   //   %c = setcc eq %b, 0
9667   //   brcond %c ...
9668   //
9669   // However when after the source operand of SRL is optimized into AND, the SRL
9670   // itself may not be optimized further. Look for it and add the BRCOND into
9671   // the worklist.
9672   if (N->hasOneUse()) {
9673     SDNode *Use = *N->use_begin();
9674     if (Use->getOpcode() == ISD::BRCOND)
9675       AddToWorklist(Use);
9676     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9677       // Also look pass the truncate.
9678       Use = *Use->use_begin();
9679       if (Use->getOpcode() == ISD::BRCOND)
9680         AddToWorklist(Use);
9681     }
9682   }
9683
9684   // Try to transform this shift into a multiply-high if
9685   // it matches the appropriate pattern detected in combineShiftToMULH.
9686   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9687     return MULH;
9688
9689   return SDValue();
9690 }
9691
9692 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9693   EVT VT = N->getValueType(0);
9694   SDValue N0 = N->getOperand(0);
9695   SDValue N1 = N->getOperand(1);
9696   SDValue N2 = N->getOperand(2);
9697   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9698   unsigned BitWidth = VT.getScalarSizeInBits();
9699
9700   // fold (fshl N0, N1, 0) -> N0
9701   // fold (fshr N0, N1, 0) -> N1
9702   if (isPowerOf2_32(BitWidth))
9703     if (DAG.MaskedValueIsZero(
9704             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9705       return IsFSHL ? N0 : N1;
9706
9707   auto IsUndefOrZero = [](SDValue V) {
9708     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9709   };
9710
9711   // TODO - support non-uniform vector shift amounts.
9712   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9713     EVT ShAmtTy = N2.getValueType();
9714
9715     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9716     if (Cst->getAPIntValue().uge(BitWidth)) {
9717       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9718       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9719                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9720     }
9721
9722     unsigned ShAmt = Cst->getZExtValue();
9723     if (ShAmt == 0)
9724       return IsFSHL ? N0 : N1;
9725
9726     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9727     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9728     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9729     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9730     if (IsUndefOrZero(N0))
9731       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9732                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9733                                          SDLoc(N), ShAmtTy));
9734     if (IsUndefOrZero(N1))
9735       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9736                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9737                                          SDLoc(N), ShAmtTy));
9738
9739     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9740     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9741     // TODO - bigendian support once we have test coverage.
9742     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9743     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9744     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9745         !DAG.getDataLayout().isBigEndian()) {
9746       auto *LHS = dyn_cast<LoadSDNode>(N0);
9747       auto *RHS = dyn_cast<LoadSDNode>(N1);
9748       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9749           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9750           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9751           ISD::isNON_EXTLoad(LHS)) {
9752         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9753           SDLoc DL(RHS);
9754           uint64_t PtrOff =
9755               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9756           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9757           bool Fast = false;
9758           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9759                                      RHS->getAddressSpace(), NewAlign,
9760                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9761               Fast) {
9762             SDValue NewPtr = DAG.getMemBasePlusOffset(
9763                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9764             AddToWorklist(NewPtr.getNode());
9765             SDValue Load = DAG.getLoad(
9766                 VT, DL, RHS->getChain(), NewPtr,
9767                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9768                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9769             // Replace the old load's chain with the new load's chain.
9770             WorklistRemover DeadNodes(*this);
9771             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9772             return Load;
9773           }
9774         }
9775       }
9776     }
9777   }
9778
9779   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9780   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9781   // iff We know the shift amount is in range.
9782   // TODO: when is it worth doing SUB(BW, N2) as well?
9783   if (isPowerOf2_32(BitWidth)) {
9784     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9785     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9786       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9787     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9788       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9789   }
9790
9791   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9792   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9793   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9794   // is legal as well we might be better off avoiding non-constant (BW - N2).
9795   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9796   if (N0 == N1 && hasOperation(RotOpc, VT))
9797     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9798
9799   // Simplify, based on bits shifted out of N0/N1.
9800   if (SimplifyDemandedBits(SDValue(N, 0)))
9801     return SDValue(N, 0);
9802
9803   return SDValue();
9804 }
9805
9806 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9807   SDValue N0 = N->getOperand(0);
9808   SDValue N1 = N->getOperand(1);
9809   if (SDValue V = DAG.simplifyShift(N0, N1))
9810     return V;
9811
9812   EVT VT = N0.getValueType();
9813
9814   // fold (*shlsat c1, c2) -> c1<<c2
9815   if (SDValue C =
9816           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9817     return C;
9818
9819   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9820
9821   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
9822     // fold (sshlsat x, c) -> (shl x, c)
9823     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
9824         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
9825       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9826
9827     // fold (ushlsat x, c) -> (shl x, c)
9828     if (N->getOpcode() == ISD::USHLSAT && N1C &&
9829         N1C->getAPIntValue().ule(
9830             DAG.computeKnownBits(N0).countMinLeadingZeros()))
9831       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9832   }
9833
9834   return SDValue();
9835 }
9836
9837 // Given a ABS node, detect the following pattern:
9838 // (ABS (SUB (EXTEND a), (EXTEND b))).
9839 // Generates UABD/SABD instruction.
9840 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9841                                const TargetLowering &TLI) {
9842   SDValue AbsOp1 = N->getOperand(0);
9843   SDValue Op0, Op1;
9844
9845   if (AbsOp1.getOpcode() != ISD::SUB)
9846     return SDValue();
9847
9848   Op0 = AbsOp1.getOperand(0);
9849   Op1 = AbsOp1.getOperand(1);
9850
9851   unsigned Opc0 = Op0.getOpcode();
9852   // Check if the operands of the sub are (zero|sign)-extended.
9853   if (Opc0 != Op1.getOpcode() ||
9854       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9855     return SDValue();
9856
9857   EVT VT = N->getValueType(0);
9858   EVT VT1 = Op0.getOperand(0).getValueType();
9859   EVT VT2 = Op1.getOperand(0).getValueType();
9860   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9861
9862   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
9863   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
9864   // NOTE: Extensions must be equivalent.
9865   if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
9866     Op0 = Op0.getOperand(0);
9867     Op1 = Op1.getOperand(0);
9868     SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
9869     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
9870   }
9871
9872   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
9873   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
9874   if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
9875     return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
9876
9877   return SDValue();
9878 }
9879
9880 SDValue DAGCombiner::visitABS(SDNode *N) {
9881   SDValue N0 = N->getOperand(0);
9882   EVT VT = N->getValueType(0);
9883
9884   // fold (abs c1) -> c2
9885   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9886     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9887   // fold (abs (abs x)) -> (abs x)
9888   if (N0.getOpcode() == ISD::ABS)
9889     return N0;
9890   // fold (abs x) -> x iff not-negative
9891   if (DAG.SignBitIsZero(N0))
9892     return N0;
9893
9894   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9895     return ABD;
9896
9897   return SDValue();
9898 }
9899
9900 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9901   SDValue N0 = N->getOperand(0);
9902   EVT VT = N->getValueType(0);
9903   SDLoc DL(N);
9904
9905   // fold (bswap c1) -> c2
9906   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9907     return DAG.getNode(ISD::BSWAP, DL, VT, N0);
9908   // fold (bswap (bswap x)) -> x
9909   if (N0.getOpcode() == ISD::BSWAP)
9910     return N0.getOperand(0);
9911
9912   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9913   // isn't supported, it will be expanded to bswap followed by a manual reversal
9914   // of bits in each byte. By placing bswaps before bitreverse, we can remove
9915   // the two bswaps if the bitreverse gets expanded.
9916   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9917     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9918     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9919   }
9920
9921   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
9922   // iff x >= bw/2 (i.e. lower half is known zero)
9923   unsigned BW = VT.getScalarSizeInBits();
9924   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
9925     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9926     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
9927     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9928         ShAmt->getZExtValue() >= (BW / 2) &&
9929         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
9930         TLI.isTruncateFree(VT, HalfVT) &&
9931         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
9932       SDValue Res = N0.getOperand(0);
9933       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
9934         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
9935                           DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
9936       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
9937       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
9938       return DAG.getZExtOrTrunc(Res, DL, VT);
9939     }
9940   }
9941
9942   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
9943   // inverse-shift-of-bswap:
9944   // bswap (X u<< C) --> (bswap X) u>> C
9945   // bswap (X u>> C) --> (bswap X) u<< C
9946   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9947       N0.hasOneUse()) {
9948     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9949     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9950         ShAmt->getZExtValue() % 8 == 0) {
9951       SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9952       unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
9953       return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
9954     }
9955   }
9956
9957   return SDValue();
9958 }
9959
9960 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9961   SDValue N0 = N->getOperand(0);
9962   EVT VT = N->getValueType(0);
9963
9964   // fold (bitreverse c1) -> c2
9965   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9966     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9967   // fold (bitreverse (bitreverse x)) -> x
9968   if (N0.getOpcode() == ISD::BITREVERSE)
9969     return N0.getOperand(0);
9970   return SDValue();
9971 }
9972
9973 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9974   SDValue N0 = N->getOperand(0);
9975   EVT VT = N->getValueType(0);
9976
9977   // fold (ctlz c1) -> c2
9978   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9979     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9980
9981   // If the value is known never to be zero, switch to the undef version.
9982   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9983     if (DAG.isKnownNeverZero(N0))
9984       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9985   }
9986
9987   return SDValue();
9988 }
9989
9990 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9991   SDValue N0 = N->getOperand(0);
9992   EVT VT = N->getValueType(0);
9993
9994   // fold (ctlz_zero_undef c1) -> c2
9995   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9996     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9997   return SDValue();
9998 }
9999
10000 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10001   SDValue N0 = N->getOperand(0);
10002   EVT VT = N->getValueType(0);
10003
10004   // fold (cttz c1) -> c2
10005   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10006     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
10007
10008   // If the value is known never to be zero, switch to the undef version.
10009   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
10010     if (DAG.isKnownNeverZero(N0))
10011       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10012   }
10013
10014   return SDValue();
10015 }
10016
10017 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
10018   SDValue N0 = N->getOperand(0);
10019   EVT VT = N->getValueType(0);
10020
10021   // fold (cttz_zero_undef c1) -> c2
10022   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10023     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10024   return SDValue();
10025 }
10026
10027 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
10028   SDValue N0 = N->getOperand(0);
10029   EVT VT = N->getValueType(0);
10030
10031   // fold (ctpop c1) -> c2
10032   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10033     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
10034   return SDValue();
10035 }
10036
10037 // FIXME: This should be checking for no signed zeros on individual operands, as
10038 // well as no nans.
10039 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
10040                                          SDValue RHS,
10041                                          const TargetLowering &TLI) {
10042   const TargetOptions &Options = DAG.getTarget().Options;
10043   EVT VT = LHS.getValueType();
10044
10045   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
10046          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
10047          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
10048 }
10049
10050 /// Generate Min/Max node
10051 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10052                                    SDValue RHS, SDValue True, SDValue False,
10053                                    ISD::CondCode CC, const TargetLowering &TLI,
10054                                    SelectionDAG &DAG) {
10055   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
10056     return SDValue();
10057
10058   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
10059   switch (CC) {
10060   case ISD::SETOLT:
10061   case ISD::SETOLE:
10062   case ISD::SETLT:
10063   case ISD::SETLE:
10064   case ISD::SETULT:
10065   case ISD::SETULE: {
10066     // Since it's known never nan to get here already, either fminnum or
10067     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
10068     // expanded in terms of it.
10069     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10070     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10071       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10072
10073     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
10074     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10075       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10076     return SDValue();
10077   }
10078   case ISD::SETOGT:
10079   case ISD::SETOGE:
10080   case ISD::SETGT:
10081   case ISD::SETGE:
10082   case ISD::SETUGT:
10083   case ISD::SETUGE: {
10084     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
10085     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10086       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10087
10088     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
10089     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10090       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10091     return SDValue();
10092   }
10093   default:
10094     return SDValue();
10095   }
10096 }
10097
10098 /// If a (v)select has a condition value that is a sign-bit test, try to smear
10099 /// the condition operand sign-bit across the value width and use it as a mask.
10100 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
10101   SDValue Cond = N->getOperand(0);
10102   SDValue C1 = N->getOperand(1);
10103   SDValue C2 = N->getOperand(2);
10104   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
10105     return SDValue();
10106
10107   EVT VT = N->getValueType(0);
10108   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
10109       VT != Cond.getOperand(0).getValueType())
10110     return SDValue();
10111
10112   // The inverted-condition + commuted-select variants of these patterns are
10113   // canonicalized to these forms in IR.
10114   SDValue X = Cond.getOperand(0);
10115   SDValue CondC = Cond.getOperand(1);
10116   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10117   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
10118       isAllOnesOrAllOnesSplat(C2)) {
10119     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
10120     SDLoc DL(N);
10121     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10122     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10123     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
10124   }
10125   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
10126     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
10127     SDLoc DL(N);
10128     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10129     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10130     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
10131   }
10132   return SDValue();
10133 }
10134
10135 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
10136   SDValue Cond = N->getOperand(0);
10137   SDValue N1 = N->getOperand(1);
10138   SDValue N2 = N->getOperand(2);
10139   EVT VT = N->getValueType(0);
10140   EVT CondVT = Cond.getValueType();
10141   SDLoc DL(N);
10142
10143   if (!VT.isInteger())
10144     return SDValue();
10145
10146   auto *C1 = dyn_cast<ConstantSDNode>(N1);
10147   auto *C2 = dyn_cast<ConstantSDNode>(N2);
10148   if (!C1 || !C2)
10149     return SDValue();
10150
10151   // Only do this before legalization to avoid conflicting with target-specific
10152   // transforms in the other direction (create a select from a zext/sext). There
10153   // is also a target-independent combine here in DAGCombiner in the other
10154   // direction for (select Cond, -1, 0) when the condition is not i1.
10155   if (CondVT == MVT::i1 && !LegalOperations) {
10156     if (C1->isZero() && C2->isOne()) {
10157       // select Cond, 0, 1 --> zext (!Cond)
10158       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10159       if (VT != MVT::i1)
10160         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
10161       return NotCond;
10162     }
10163     if (C1->isZero() && C2->isAllOnes()) {
10164       // select Cond, 0, -1 --> sext (!Cond)
10165       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10166       if (VT != MVT::i1)
10167         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
10168       return NotCond;
10169     }
10170     if (C1->isOne() && C2->isZero()) {
10171       // select Cond, 1, 0 --> zext (Cond)
10172       if (VT != MVT::i1)
10173         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10174       return Cond;
10175     }
10176     if (C1->isAllOnes() && C2->isZero()) {
10177       // select Cond, -1, 0 --> sext (Cond)
10178       if (VT != MVT::i1)
10179         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10180       return Cond;
10181     }
10182
10183     // Use a target hook because some targets may prefer to transform in the
10184     // other direction.
10185     if (TLI.convertSelectOfConstantsToMath(VT)) {
10186       // For any constants that differ by 1, we can transform the select into an
10187       // extend and add.
10188       const APInt &C1Val = C1->getAPIntValue();
10189       const APInt &C2Val = C2->getAPIntValue();
10190       if (C1Val - 1 == C2Val) {
10191         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
10192         if (VT != MVT::i1)
10193           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10194         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10195       }
10196       if (C1Val + 1 == C2Val) {
10197         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
10198         if (VT != MVT::i1)
10199           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10200         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10201       }
10202
10203       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
10204       if (C1Val.isPowerOf2() && C2Val.isZero()) {
10205         if (VT != MVT::i1)
10206           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10207         SDValue ShAmtC =
10208             DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
10209         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
10210       }
10211
10212       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10213         return V;
10214     }
10215
10216     return SDValue();
10217   }
10218
10219   // fold (select Cond, 0, 1) -> (xor Cond, 1)
10220   // We can't do this reliably if integer based booleans have different contents
10221   // to floating point based booleans. This is because we can't tell whether we
10222   // have an integer-based boolean or a floating-point-based boolean unless we
10223   // can find the SETCC that produced it and inspect its operands. This is
10224   // fairly easy if C is the SETCC node, but it can potentially be
10225   // undiscoverable (or not reasonably discoverable). For example, it could be
10226   // in another basic block or it could require searching a complicated
10227   // expression.
10228   if (CondVT.isInteger() &&
10229       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
10230           TargetLowering::ZeroOrOneBooleanContent &&
10231       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
10232           TargetLowering::ZeroOrOneBooleanContent &&
10233       C1->isZero() && C2->isOne()) {
10234     SDValue NotCond =
10235         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
10236     if (VT.bitsEq(CondVT))
10237       return NotCond;
10238     return DAG.getZExtOrTrunc(NotCond, DL, VT);
10239   }
10240
10241   return SDValue();
10242 }
10243
10244 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
10245   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
10246          "Expected a (v)select");
10247   SDValue Cond = N->getOperand(0);
10248   SDValue T = N->getOperand(1), F = N->getOperand(2);
10249   EVT VT = N->getValueType(0);
10250   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
10251     return SDValue();
10252
10253   // select Cond, Cond, F --> or Cond, F
10254   // select Cond, 1, F    --> or Cond, F
10255   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
10256     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
10257
10258   // select Cond, T, Cond --> and Cond, T
10259   // select Cond, T, 0    --> and Cond, T
10260   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
10261     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
10262
10263   // select Cond, T, 1 --> or (not Cond), T
10264   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
10265     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10266     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
10267   }
10268
10269   // select Cond, 0, F --> and (not Cond), F
10270   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
10271     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10272     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
10273   }
10274
10275   return SDValue();
10276 }
10277
10278 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
10279   SDValue N0 = N->getOperand(0);
10280   SDValue N1 = N->getOperand(1);
10281   SDValue N2 = N->getOperand(2);
10282   EVT VT = N->getValueType(0);
10283   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
10284     return SDValue();
10285
10286   SDValue Cond0 = N0.getOperand(0);
10287   SDValue Cond1 = N0.getOperand(1);
10288   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10289   if (VT != Cond0.getValueType())
10290     return SDValue();
10291
10292   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
10293   // compare is inverted from that pattern ("Cond0 s> -1").
10294   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
10295     ; // This is the pattern we are looking for.
10296   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
10297     std::swap(N1, N2);
10298   else
10299     return SDValue();
10300
10301   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
10302   if (isNullOrNullSplat(N2)) {
10303     SDLoc DL(N);
10304     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10305     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10306     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
10307   }
10308
10309   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
10310   if (isAllOnesOrAllOnesSplat(N1)) {
10311     SDLoc DL(N);
10312     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10313     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10314     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
10315   }
10316
10317   // If we have to invert the sign bit mask, only do that transform if the
10318   // target has a bitwise 'and not' instruction (the invert is free).
10319   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
10320   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10321   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
10322     SDLoc DL(N);
10323     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10324     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10325     SDValue Not = DAG.getNOT(DL, Sra, VT);
10326     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
10327   }
10328
10329   // TODO: There's another pattern in this family, but it may require
10330   //       implementing hasOrNot() to check for profitability:
10331   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
10332
10333   return SDValue();
10334 }
10335
10336 SDValue DAGCombiner::visitSELECT(SDNode *N) {
10337   SDValue N0 = N->getOperand(0);
10338   SDValue N1 = N->getOperand(1);
10339   SDValue N2 = N->getOperand(2);
10340   EVT VT = N->getValueType(0);
10341   EVT VT0 = N0.getValueType();
10342   SDLoc DL(N);
10343   SDNodeFlags Flags = N->getFlags();
10344
10345   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10346     return V;
10347
10348   if (SDValue V = foldSelectOfConstants(N))
10349     return V;
10350
10351   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10352     return V;
10353
10354   // If we can fold this based on the true/false value, do so.
10355   if (SimplifySelectOps(N, N1, N2))
10356     return SDValue(N, 0); // Don't revisit N.
10357
10358   if (VT0 == MVT::i1) {
10359     // The code in this block deals with the following 2 equivalences:
10360     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
10361     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
10362     // The target can specify its preferred form with the
10363     // shouldNormalizeToSelectSequence() callback. However we always transform
10364     // to the right anyway if we find the inner select exists in the DAG anyway
10365     // and we always transform to the left side if we know that we can further
10366     // optimize the combination of the conditions.
10367     bool normalizeToSequence =
10368         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
10369     // select (and Cond0, Cond1), X, Y
10370     //   -> select Cond0, (select Cond1, X, Y), Y
10371     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
10372       SDValue Cond0 = N0->getOperand(0);
10373       SDValue Cond1 = N0->getOperand(1);
10374       SDValue InnerSelect =
10375           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
10376       if (normalizeToSequence || !InnerSelect.use_empty())
10377         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
10378                            InnerSelect, N2, Flags);
10379       // Cleanup on failure.
10380       if (InnerSelect.use_empty())
10381         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10382     }
10383     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
10384     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
10385       SDValue Cond0 = N0->getOperand(0);
10386       SDValue Cond1 = N0->getOperand(1);
10387       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
10388                                         Cond1, N1, N2, Flags);
10389       if (normalizeToSequence || !InnerSelect.use_empty())
10390         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
10391                            InnerSelect, Flags);
10392       // Cleanup on failure.
10393       if (InnerSelect.use_empty())
10394         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10395     }
10396
10397     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
10398     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
10399       SDValue N1_0 = N1->getOperand(0);
10400       SDValue N1_1 = N1->getOperand(1);
10401       SDValue N1_2 = N1->getOperand(2);
10402       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
10403         // Create the actual and node if we can generate good code for it.
10404         if (!normalizeToSequence) {
10405           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
10406           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
10407                              N2, Flags);
10408         }
10409         // Otherwise see if we can optimize the "and" to a better pattern.
10410         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
10411           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
10412                              N2, Flags);
10413         }
10414       }
10415     }
10416     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
10417     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
10418       SDValue N2_0 = N2->getOperand(0);
10419       SDValue N2_1 = N2->getOperand(1);
10420       SDValue N2_2 = N2->getOperand(2);
10421       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
10422         // Create the actual or node if we can generate good code for it.
10423         if (!normalizeToSequence) {
10424           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
10425           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
10426                              N2_2, Flags);
10427         }
10428         // Otherwise see if we can optimize to a better pattern.
10429         if (SDValue Combined = visitORLike(N0, N2_0, N))
10430           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
10431                              N2_2, Flags);
10432       }
10433     }
10434   }
10435
10436   // select (not Cond), N1, N2 -> select Cond, N2, N1
10437   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10438     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10439     SelectOp->setFlags(Flags);
10440     return SelectOp;
10441   }
10442
10443   // Fold selects based on a setcc into other things, such as min/max/abs.
10444   if (N0.getOpcode() == ISD::SETCC) {
10445     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
10446     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10447
10448     // select (fcmp lt x, y), x, y -> fminnum x, y
10449     // select (fcmp gt x, y), x, y -> fmaxnum x, y
10450     //
10451     // This is OK if we don't care what happens if either operand is a NaN.
10452     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10453       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
10454                                                 CC, TLI, DAG))
10455         return FMinMax;
10456
10457     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
10458     // This is conservatively limited to pre-legal-operations to give targets
10459     // a chance to reverse the transform if they want to do that. Also, it is
10460     // unlikely that the pattern would be formed late, so it's probably not
10461     // worth going through the other checks.
10462     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
10463         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
10464         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10465       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10466       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10467       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10468         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10469         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10470         //
10471         // The IR equivalent of this transform would have this form:
10472         //   %a = add %x, C
10473         //   %c = icmp ugt %x, ~C
10474         //   %r = select %c, -1, %a
10475         //   =>
10476         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10477         //   %u0 = extractvalue %u, 0
10478         //   %u1 = extractvalue %u, 1
10479         //   %r = select %u1, -1, %u0
10480         SDVTList VTs = DAG.getVTList(VT, VT0);
10481         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10482         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10483       }
10484     }
10485
10486     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10487         (!LegalOperations &&
10488          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
10489       // Any flags available in a select/setcc fold will be on the setcc as they
10490       // migrated from fcmp
10491       Flags = N0->getFlags();
10492       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10493                                        N2, N0.getOperand(2));
10494       SelectNode->setFlags(Flags);
10495       return SelectNode;
10496     }
10497
10498     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10499       return NewSel;
10500   }
10501
10502   if (!VT.isVector())
10503     if (SDValue BinOp = foldSelectOfBinops(N))
10504       return BinOp;
10505
10506   return SDValue();
10507 }
10508
10509 // This function assumes all the vselect's arguments are CONCAT_VECTOR
10510 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10511 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
10512   SDLoc DL(N);
10513   SDValue Cond = N->getOperand(0);
10514   SDValue LHS = N->getOperand(1);
10515   SDValue RHS = N->getOperand(2);
10516   EVT VT = N->getValueType(0);
10517   int NumElems = VT.getVectorNumElements();
10518   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10519          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10520          Cond.getOpcode() == ISD::BUILD_VECTOR);
10521
10522   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10523   // binary ones here.
10524   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10525     return SDValue();
10526
10527   // We're sure we have an even number of elements due to the
10528   // concat_vectors we have as arguments to vselect.
10529   // Skip BV elements until we find one that's not an UNDEF
10530   // After we find an UNDEF element, keep looping until we get to half the
10531   // length of the BV and see if all the non-undef nodes are the same.
10532   ConstantSDNode *BottomHalf = nullptr;
10533   for (int i = 0; i < NumElems / 2; ++i) {
10534     if (Cond->getOperand(i)->isUndef())
10535       continue;
10536
10537     if (BottomHalf == nullptr)
10538       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10539     else if (Cond->getOperand(i).getNode() != BottomHalf)
10540       return SDValue();
10541   }
10542
10543   // Do the same for the second half of the BuildVector
10544   ConstantSDNode *TopHalf = nullptr;
10545   for (int i = NumElems / 2; i < NumElems; ++i) {
10546     if (Cond->getOperand(i)->isUndef())
10547       continue;
10548
10549     if (TopHalf == nullptr)
10550       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10551     else if (Cond->getOperand(i).getNode() != TopHalf)
10552       return SDValue();
10553   }
10554
10555   assert(TopHalf && BottomHalf &&
10556          "One half of the selector was all UNDEFs and the other was all the "
10557          "same value. This should have been addressed before this function.");
10558   return DAG.getNode(
10559       ISD::CONCAT_VECTORS, DL, VT,
10560       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10561       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10562 }
10563
10564 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
10565                        SelectionDAG &DAG) {
10566   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10567     return false;
10568
10569   // Only perform the transformation when existing operands can be reused.
10570   if (IndexIsScaled)
10571     return false;
10572
10573   // For now we check only the LHS of the add.
10574   SDValue LHS = Index.getOperand(0);
10575   SDValue SplatVal = DAG.getSplatValue(LHS);
10576   if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
10577     return false;
10578
10579   BasePtr = SplatVal;
10580   Index = Index.getOperand(1);
10581   return true;
10582 }
10583
10584 // Fold sext/zext of index into index type.
10585 bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
10586                      SelectionDAG &DAG) {
10587   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10588
10589   // It's always safe to look through zero extends.
10590   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10591     SDValue Op = Index.getOperand(0);
10592     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
10593       IndexType = ISD::UNSIGNED_SCALED;
10594       Index = Op;
10595       return true;
10596     }
10597     if (ISD::isIndexTypeSigned(IndexType)) {
10598       IndexType = ISD::UNSIGNED_SCALED;
10599       return true;
10600     }
10601   }
10602
10603   // It's only safe to look through sign extends when Index is signed.
10604   if (Index.getOpcode() == ISD::SIGN_EXTEND &&
10605       ISD::isIndexTypeSigned(IndexType)) {
10606     SDValue Op = Index.getOperand(0);
10607     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
10608       Index = Op;
10609       return true;
10610     }
10611   }
10612
10613   return false;
10614 }
10615
10616 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10617   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10618   SDValue Mask = MSC->getMask();
10619   SDValue Chain = MSC->getChain();
10620   SDValue Index = MSC->getIndex();
10621   SDValue Scale = MSC->getScale();
10622   SDValue StoreVal = MSC->getValue();
10623   SDValue BasePtr = MSC->getBasePtr();
10624   ISD::MemIndexType IndexType = MSC->getIndexType();
10625   SDLoc DL(N);
10626
10627   // Zap scatters with a zero mask.
10628   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10629     return Chain;
10630
10631   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
10632     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10633     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
10634                                 DL, Ops, MSC->getMemOperand(), IndexType,
10635                                 MSC->isTruncatingStore());
10636   }
10637
10638   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
10639     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10640     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
10641                                 DL, Ops, MSC->getMemOperand(), IndexType,
10642                                 MSC->isTruncatingStore());
10643   }
10644
10645   return SDValue();
10646 }
10647
10648 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10649   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10650   SDValue Mask = MST->getMask();
10651   SDValue Chain = MST->getChain();
10652   SDValue Value = MST->getValue();
10653   SDValue Ptr = MST->getBasePtr();
10654   SDLoc DL(N);
10655
10656   // Zap masked stores with a zero mask.
10657   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10658     return Chain;
10659
10660   // If this is a masked load with an all ones mask, we can use a unmasked load.
10661   // FIXME: Can we do this for indexed, compressing, or truncating stores?
10662   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10663       !MST->isCompressingStore() && !MST->isTruncatingStore())
10664     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10665                         MST->getBasePtr(), MST->getPointerInfo(),
10666                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
10667                         MST->getAAInfo());
10668
10669   // Try transforming N to an indexed store.
10670   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10671     return SDValue(N, 0);
10672
10673   if (MST->isTruncatingStore() && MST->isUnindexed() &&
10674       Value.getValueType().isInteger() &&
10675       (!isa<ConstantSDNode>(Value) ||
10676        !cast<ConstantSDNode>(Value)->isOpaque())) {
10677     APInt TruncDemandedBits =
10678         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10679                              MST->getMemoryVT().getScalarSizeInBits());
10680
10681     // See if we can simplify the operation with
10682     // SimplifyDemandedBits, which only works if the value has a single use.
10683     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10684       // Re-visit the store if anything changed and the store hasn't been merged
10685       // with another node (N is deleted) SimplifyDemandedBits will add Value's
10686       // node back to the worklist if necessary, but we also need to re-visit
10687       // the Store node itself.
10688       if (N->getOpcode() != ISD::DELETED_NODE)
10689         AddToWorklist(N);
10690       return SDValue(N, 0);
10691     }
10692   }
10693
10694   // If this is a TRUNC followed by a masked store, fold this into a masked
10695   // truncating store.  We can do this even if this is already a masked
10696   // truncstore.
10697   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
10698       MST->isUnindexed() &&
10699       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10700                                MST->getMemoryVT(), LegalOperations)) {
10701     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10702                                          Value.getOperand(0).getValueType());
10703     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10704                               MST->getOffset(), Mask, MST->getMemoryVT(),
10705                               MST->getMemOperand(), MST->getAddressingMode(),
10706                               /*IsTruncating=*/true);
10707   }
10708
10709   return SDValue();
10710 }
10711
10712 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10713   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10714   SDValue Mask = MGT->getMask();
10715   SDValue Chain = MGT->getChain();
10716   SDValue Index = MGT->getIndex();
10717   SDValue Scale = MGT->getScale();
10718   SDValue PassThru = MGT->getPassThru();
10719   SDValue BasePtr = MGT->getBasePtr();
10720   ISD::MemIndexType IndexType = MGT->getIndexType();
10721   SDLoc DL(N);
10722
10723   // Zap gathers with a zero mask.
10724   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10725     return CombineTo(N, PassThru, MGT->getChain());
10726
10727   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
10728     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10729     return DAG.getMaskedGather(
10730         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
10731         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
10732   }
10733
10734   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
10735     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10736     return DAG.getMaskedGather(
10737         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
10738         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
10739   }
10740
10741   return SDValue();
10742 }
10743
10744 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10745   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10746   SDValue Mask = MLD->getMask();
10747   SDLoc DL(N);
10748
10749   // Zap masked loads with a zero mask.
10750   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10751     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10752
10753   // If this is a masked load with an all ones mask, we can use a unmasked load.
10754   // FIXME: Can we do this for indexed, expanding, or extending loads?
10755   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10756       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10757     SDValue NewLd = DAG.getLoad(
10758         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10759         MLD->getPointerInfo(), MLD->getOriginalAlign(),
10760         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10761     return CombineTo(N, NewLd, NewLd.getValue(1));
10762   }
10763
10764   // Try transforming N to an indexed load.
10765   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10766     return SDValue(N, 0);
10767
10768   return SDValue();
10769 }
10770
10771 /// A vector select of 2 constant vectors can be simplified to math/logic to
10772 /// avoid a variable select instruction and possibly avoid constant loads.
10773 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10774   SDValue Cond = N->getOperand(0);
10775   SDValue N1 = N->getOperand(1);
10776   SDValue N2 = N->getOperand(2);
10777   EVT VT = N->getValueType(0);
10778   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10779       !TLI.convertSelectOfConstantsToMath(VT) ||
10780       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10781       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10782     return SDValue();
10783
10784   // Check if we can use the condition value to increment/decrement a single
10785   // constant value. This simplifies a select to an add and removes a constant
10786   // load/materialization from the general case.
10787   bool AllAddOne = true;
10788   bool AllSubOne = true;
10789   unsigned Elts = VT.getVectorNumElements();
10790   for (unsigned i = 0; i != Elts; ++i) {
10791     SDValue N1Elt = N1.getOperand(i);
10792     SDValue N2Elt = N2.getOperand(i);
10793     if (N1Elt.isUndef() || N2Elt.isUndef())
10794       continue;
10795     if (N1Elt.getValueType() != N2Elt.getValueType())
10796       continue;
10797
10798     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10799     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10800     if (C1 != C2 + 1)
10801       AllAddOne = false;
10802     if (C1 != C2 - 1)
10803       AllSubOne = false;
10804   }
10805
10806   // Further simplifications for the extra-special cases where the constants are
10807   // all 0 or all -1 should be implemented as folds of these patterns.
10808   SDLoc DL(N);
10809   if (AllAddOne || AllSubOne) {
10810     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10811     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10812     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10813     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10814     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10815   }
10816
10817   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10818   APInt Pow2C;
10819   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10820       isNullOrNullSplat(N2)) {
10821     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10822     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10823     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10824   }
10825
10826   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10827     return V;
10828
10829   // The general case for select-of-constants:
10830   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10831   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10832   // leave that to a machine-specific pass.
10833   return SDValue();
10834 }
10835
10836 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10837   SDValue N0 = N->getOperand(0);
10838   SDValue N1 = N->getOperand(1);
10839   SDValue N2 = N->getOperand(2);
10840   EVT VT = N->getValueType(0);
10841   SDLoc DL(N);
10842
10843   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10844     return V;
10845
10846   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10847     return V;
10848
10849   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10850   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10851     return DAG.getSelect(DL, VT, F, N2, N1);
10852
10853   // Canonicalize integer abs.
10854   // vselect (setg[te] X,  0),  X, -X ->
10855   // vselect (setgt    X, -1),  X, -X ->
10856   // vselect (setl[te] X,  0), -X,  X ->
10857   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10858   if (N0.getOpcode() == ISD::SETCC) {
10859     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10860     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10861     bool isAbs = false;
10862     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10863
10864     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10865          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10866         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10867       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10868     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10869              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10870       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10871
10872     if (isAbs) {
10873       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10874         return DAG.getNode(ISD::ABS, DL, VT, LHS);
10875
10876       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10877                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
10878                                                   DL, getShiftAmountTy(VT)));
10879       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10880       AddToWorklist(Shift.getNode());
10881       AddToWorklist(Add.getNode());
10882       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10883     }
10884
10885     // vselect x, y (fcmp lt x, y) -> fminnum x, y
10886     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10887     //
10888     // This is OK if we don't care about what happens if either operand is a
10889     // NaN.
10890     //
10891     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10892       if (SDValue FMinMax =
10893               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10894         return FMinMax;
10895     }
10896
10897     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10898       return S;
10899     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10900       return S;
10901
10902     // If this select has a condition (setcc) with narrower operands than the
10903     // select, try to widen the compare to match the select width.
10904     // TODO: This should be extended to handle any constant.
10905     // TODO: This could be extended to handle non-loading patterns, but that
10906     //       requires thorough testing to avoid regressions.
10907     if (isNullOrNullSplat(RHS)) {
10908       EVT NarrowVT = LHS.getValueType();
10909       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10910       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10911       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10912       unsigned WideWidth = WideVT.getScalarSizeInBits();
10913       bool IsSigned = isSignedIntSetCC(CC);
10914       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10915       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10916           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10917           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10918           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10919         // Both compare operands can be widened for free. The LHS can use an
10920         // extended load, and the RHS is a constant:
10921         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10922         //   vselect (setcc extload(X), C'), N1, N2
10923         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10924         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10925         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10926         EVT WideSetCCVT = getSetCCResultType(WideVT);
10927         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10928         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10929       }
10930     }
10931
10932     // Match VSELECTs into add with unsigned saturation.
10933     if (hasOperation(ISD::UADDSAT, VT)) {
10934       // Check if one of the arms of the VSELECT is vector with all bits set.
10935       // If it's on the left side invert the predicate to simplify logic below.
10936       SDValue Other;
10937       ISD::CondCode SatCC = CC;
10938       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10939         Other = N2;
10940         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10941       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10942         Other = N1;
10943       }
10944
10945       if (Other && Other.getOpcode() == ISD::ADD) {
10946         SDValue CondLHS = LHS, CondRHS = RHS;
10947         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10948
10949         // Canonicalize condition operands.
10950         if (SatCC == ISD::SETUGE) {
10951           std::swap(CondLHS, CondRHS);
10952           SatCC = ISD::SETULE;
10953         }
10954
10955         // We can test against either of the addition operands.
10956         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10957         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10958         if (SatCC == ISD::SETULE && Other == CondRHS &&
10959             (OpLHS == CondLHS || OpRHS == CondLHS))
10960           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10961
10962         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10963             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10964              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10965             CondLHS == OpLHS) {
10966           // If the RHS is a constant we have to reverse the const
10967           // canonicalization.
10968           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10969           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10970             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10971           };
10972           if (SatCC == ISD::SETULE &&
10973               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10974             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10975         }
10976       }
10977     }
10978
10979     // Match VSELECTs into sub with unsigned saturation.
10980     if (hasOperation(ISD::USUBSAT, VT)) {
10981       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10982       // the left side invert the predicate to simplify logic below.
10983       SDValue Other;
10984       ISD::CondCode SatCC = CC;
10985       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10986         Other = N2;
10987         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10988       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10989         Other = N1;
10990       }
10991
10992       // zext(x) >= y ? trunc(zext(x) - y) : 0
10993       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
10994       // zext(x) >  y ? trunc(zext(x) - y) : 0
10995       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
10996       if (Other && Other.getOpcode() == ISD::TRUNCATE &&
10997           Other.getOperand(0).getOpcode() == ISD::SUB &&
10998           (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
10999         SDValue OpLHS = Other.getOperand(0).getOperand(0);
11000         SDValue OpRHS = Other.getOperand(0).getOperand(1);
11001         if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
11002           if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
11003                                               DAG, DL))
11004             return R;
11005       }
11006
11007       if (Other && Other.getNumOperands() == 2) {
11008         SDValue CondRHS = RHS;
11009         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
11010
11011         if (OpLHS == LHS) {
11012           // Look for a general sub with unsigned saturation first.
11013           // x >= y ? x-y : 0 --> usubsat x, y
11014           // x >  y ? x-y : 0 --> usubsat x, y
11015           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
11016               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
11017             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11018
11019           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
11020               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
11021             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
11022                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
11023               // If the RHS is a constant we have to reverse the const
11024               // canonicalization.
11025               // x > C-1 ? x+-C : 0 --> usubsat x, C
11026               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
11027                 return (!Op && !Cond) ||
11028                        (Op && Cond &&
11029                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
11030               };
11031               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
11032                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
11033                                             /*AllowUndefs*/ true)) {
11034                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
11035                                     DAG.getConstant(0, DL, VT), OpRHS);
11036                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11037               }
11038
11039               // Another special case: If C was a sign bit, the sub has been
11040               // canonicalized into a xor.
11041               // FIXME: Would it be better to use computeKnownBits to
11042               // determine whether it's safe to decanonicalize the xor?
11043               // x s< 0 ? x^C : 0 --> usubsat x, C
11044               APInt SplatValue;
11045               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
11046                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
11047                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
11048                   SplatValue.isSignMask()) {
11049                 // Note that we have to rebuild the RHS constant here to
11050                 // ensure we don't rely on particular values of undef lanes.
11051                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
11052                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11053               }
11054             }
11055           }
11056         }
11057       }
11058     }
11059   }
11060
11061   if (SimplifySelectOps(N, N1, N2))
11062     return SDValue(N, 0);  // Don't revisit N.
11063
11064   // Fold (vselect all_ones, N1, N2) -> N1
11065   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
11066     return N1;
11067   // Fold (vselect all_zeros, N1, N2) -> N2
11068   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
11069     return N2;
11070
11071   // The ConvertSelectToConcatVector function is assuming both the above
11072   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
11073   // and addressed.
11074   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
11075       N2.getOpcode() == ISD::CONCAT_VECTORS &&
11076       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
11077     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
11078       return CV;
11079   }
11080
11081   if (SDValue V = foldVSelectOfConstants(N))
11082     return V;
11083
11084   if (hasOperation(ISD::SRA, VT))
11085     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
11086       return V;
11087
11088   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11089     return SDValue(N, 0);
11090
11091   return SDValue();
11092 }
11093
11094 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
11095   SDValue N0 = N->getOperand(0);
11096   SDValue N1 = N->getOperand(1);
11097   SDValue N2 = N->getOperand(2);
11098   SDValue N3 = N->getOperand(3);
11099   SDValue N4 = N->getOperand(4);
11100   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
11101
11102   // fold select_cc lhs, rhs, x, x, cc -> x
11103   if (N2 == N3)
11104     return N2;
11105
11106   // Determine if the condition we're dealing with is constant
11107   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
11108                                   CC, SDLoc(N), false)) {
11109     AddToWorklist(SCC.getNode());
11110
11111     // cond always true -> true val
11112     // cond always false -> false val
11113     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
11114       return SCCC->isZero() ? N3 : N2;
11115
11116     // When the condition is UNDEF, just return the first operand. This is
11117     // coherent the DAG creation, no setcc node is created in this case
11118     if (SCC->isUndef())
11119       return N2;
11120
11121     // Fold to a simpler select_cc
11122     if (SCC.getOpcode() == ISD::SETCC) {
11123       SDValue SelectOp = DAG.getNode(
11124           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
11125           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
11126       SelectOp->setFlags(SCC->getFlags());
11127       return SelectOp;
11128     }
11129   }
11130
11131   // If we can fold this based on the true/false value, do so.
11132   if (SimplifySelectOps(N, N2, N3))
11133     return SDValue(N, 0);  // Don't revisit N.
11134
11135   // fold select_cc into other things, such as min/max/abs
11136   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
11137 }
11138
11139 SDValue DAGCombiner::visitSETCC(SDNode *N) {
11140   // setcc is very commonly used as an argument to brcond. This pattern
11141   // also lend itself to numerous combines and, as a result, it is desired
11142   // we keep the argument to a brcond as a setcc as much as possible.
11143   bool PreferSetCC =
11144       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
11145
11146   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
11147   EVT VT = N->getValueType(0);
11148
11149   //   SETCC(FREEZE(X), CONST, Cond)
11150   // =>
11151   //   FREEZE(SETCC(X, CONST, Cond))
11152   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
11153   // isn't equivalent to true or false.
11154   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
11155   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
11156   //
11157   // This transformation is beneficial because visitBRCOND can fold
11158   // BRCOND(FREEZE(X)) to BRCOND(X).
11159
11160   // Conservatively optimize integer comparisons only.
11161   if (PreferSetCC) {
11162     // Do this only when SETCC is going to be used by BRCOND.
11163
11164     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
11165     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
11166     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
11167     bool Updated = false;
11168
11169     // Is 'X Cond C' always true or false?
11170     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
11171       bool False = (Cond == ISD::SETULT && C->isZero()) ||
11172                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
11173                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
11174                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
11175       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
11176                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
11177                    (Cond == ISD::SETUGE && C->isZero()) ||
11178                    (Cond == ISD::SETGE  && C->isMinSignedValue());
11179       return True || False;
11180     };
11181
11182     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
11183       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
11184         N0 = N0->getOperand(0);
11185         Updated = true;
11186       }
11187     }
11188     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
11189       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
11190                                N0C)) {
11191         N1 = N1->getOperand(0);
11192         Updated = true;
11193       }
11194     }
11195
11196     if (Updated)
11197       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
11198   }
11199
11200   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
11201                                    SDLoc(N), !PreferSetCC);
11202
11203   if (!Combined)
11204     return SDValue();
11205
11206   // If we prefer to have a setcc, and we don't, we'll try our best to
11207   // recreate one using rebuildSetCC.
11208   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
11209     SDValue NewSetCC = rebuildSetCC(Combined);
11210
11211     // We don't have anything interesting to combine to.
11212     if (NewSetCC.getNode() == N)
11213       return SDValue();
11214
11215     if (NewSetCC)
11216       return NewSetCC;
11217   }
11218
11219   return Combined;
11220 }
11221
11222 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
11223   SDValue LHS = N->getOperand(0);
11224   SDValue RHS = N->getOperand(1);
11225   SDValue Carry = N->getOperand(2);
11226   SDValue Cond = N->getOperand(3);
11227
11228   // If Carry is false, fold to a regular SETCC.
11229   if (isNullConstant(Carry))
11230     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
11231
11232   return SDValue();
11233 }
11234
11235 /// Check if N satisfies:
11236 ///   N is used once.
11237 ///   N is a Load.
11238 ///   The load is compatible with ExtOpcode. It means
11239 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
11240 ///     extension.
11241 ///     Otherwise returns true.
11242 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
11243   if (!N.hasOneUse())
11244     return false;
11245
11246   if (!isa<LoadSDNode>(N))
11247     return false;
11248
11249   LoadSDNode *Load = cast<LoadSDNode>(N);
11250   ISD::LoadExtType LoadExt = Load->getExtensionType();
11251   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
11252     return true;
11253
11254   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
11255   // extension.
11256   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
11257       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
11258     return false;
11259
11260   return true;
11261 }
11262
11263 /// Fold
11264 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
11265 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
11266 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
11267 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11268 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11269 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
11270                                          SelectionDAG &DAG) {
11271   unsigned Opcode = N->getOpcode();
11272   SDValue N0 = N->getOperand(0);
11273   EVT VT = N->getValueType(0);
11274   SDLoc DL(N);
11275
11276   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11277           Opcode == ISD::ANY_EXTEND) &&
11278          "Expected EXTEND dag node in input!");
11279
11280   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
11281       !N0.hasOneUse())
11282     return SDValue();
11283
11284   SDValue Op1 = N0->getOperand(1);
11285   SDValue Op2 = N0->getOperand(2);
11286   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
11287     return SDValue();
11288
11289   auto ExtLoadOpcode = ISD::EXTLOAD;
11290   if (Opcode == ISD::SIGN_EXTEND)
11291     ExtLoadOpcode = ISD::SEXTLOAD;
11292   else if (Opcode == ISD::ZERO_EXTEND)
11293     ExtLoadOpcode = ISD::ZEXTLOAD;
11294
11295   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
11296   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
11297   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
11298       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
11299     return SDValue();
11300
11301   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
11302   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
11303   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
11304 }
11305
11306 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
11307 /// a build_vector of constants.
11308 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11309 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11310 /// Vector extends are not folded if operations are legal; this is to
11311 /// avoid introducing illegal build_vector dag nodes.
11312 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
11313                                          SelectionDAG &DAG, bool LegalTypes) {
11314   unsigned Opcode = N->getOpcode();
11315   SDValue N0 = N->getOperand(0);
11316   EVT VT = N->getValueType(0);
11317   SDLoc DL(N);
11318
11319   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11320          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
11321          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
11322          && "Expected EXTEND dag node in input!");
11323
11324   // fold (sext c1) -> c1
11325   // fold (zext c1) -> c1
11326   // fold (aext c1) -> c1
11327   if (isa<ConstantSDNode>(N0))
11328     return DAG.getNode(Opcode, DL, VT, N0);
11329
11330   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11331   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
11332   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11333   if (N0->getOpcode() == ISD::SELECT) {
11334     SDValue Op1 = N0->getOperand(1);
11335     SDValue Op2 = N0->getOperand(2);
11336     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
11337         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
11338       // For any_extend, choose sign extension of the constants to allow a
11339       // possible further transform to sign_extend_inreg.i.e.
11340       //
11341       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
11342       // t2: i64 = any_extend t1
11343       // -->
11344       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
11345       // -->
11346       // t4: i64 = sign_extend_inreg t3
11347       unsigned FoldOpc = Opcode;
11348       if (FoldOpc == ISD::ANY_EXTEND)
11349         FoldOpc = ISD::SIGN_EXTEND;
11350       return DAG.getSelect(DL, VT, N0->getOperand(0),
11351                            DAG.getNode(FoldOpc, DL, VT, Op1),
11352                            DAG.getNode(FoldOpc, DL, VT, Op2));
11353     }
11354   }
11355
11356   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
11357   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
11358   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
11359   EVT SVT = VT.getScalarType();
11360   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
11361       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
11362     return SDValue();
11363
11364   // We can fold this node into a build_vector.
11365   unsigned VTBits = SVT.getSizeInBits();
11366   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
11367   SmallVector<SDValue, 8> Elts;
11368   unsigned NumElts = VT.getVectorNumElements();
11369
11370   // For zero-extensions, UNDEF elements still guarantee to have the upper
11371   // bits set to zero.
11372   bool IsZext =
11373       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
11374
11375   for (unsigned i = 0; i != NumElts; ++i) {
11376     SDValue Op = N0.getOperand(i);
11377     if (Op.isUndef()) {
11378       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
11379       continue;
11380     }
11381
11382     SDLoc DL(Op);
11383     // Get the constant value and if needed trunc it to the size of the type.
11384     // Nodes like build_vector might have constants wider than the scalar type.
11385     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
11386     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
11387       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
11388     else
11389       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
11390   }
11391
11392   return DAG.getBuildVector(VT, DL, Elts);
11393 }
11394
11395 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
11396 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
11397 // transformation. Returns true if extension are possible and the above
11398 // mentioned transformation is profitable.
11399 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
11400                                     unsigned ExtOpc,
11401                                     SmallVectorImpl<SDNode *> &ExtendNodes,
11402                                     const TargetLowering &TLI) {
11403   bool HasCopyToRegUses = false;
11404   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
11405   for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
11406        ++UI) {
11407     SDNode *User = *UI;
11408     if (User == N)
11409       continue;
11410     if (UI.getUse().getResNo() != N0.getResNo())
11411       continue;
11412     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
11413     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
11414       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
11415       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
11416         // Sign bits will be lost after a zext.
11417         return false;
11418       bool Add = false;
11419       for (unsigned i = 0; i != 2; ++i) {
11420         SDValue UseOp = User->getOperand(i);
11421         if (UseOp == N0)
11422           continue;
11423         if (!isa<ConstantSDNode>(UseOp))
11424           return false;
11425         Add = true;
11426       }
11427       if (Add)
11428         ExtendNodes.push_back(User);
11429       continue;
11430     }
11431     // If truncates aren't free and there are users we can't
11432     // extend, it isn't worthwhile.
11433     if (!isTruncFree)
11434       return false;
11435     // Remember if this value is live-out.
11436     if (User->getOpcode() == ISD::CopyToReg)
11437       HasCopyToRegUses = true;
11438   }
11439
11440   if (HasCopyToRegUses) {
11441     bool BothLiveOut = false;
11442     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
11443          UI != UE; ++UI) {
11444       SDUse &Use = UI.getUse();
11445       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
11446         BothLiveOut = true;
11447         break;
11448       }
11449     }
11450     if (BothLiveOut)
11451       // Both unextended and extended values are live out. There had better be
11452       // a good reason for the transformation.
11453       return ExtendNodes.size();
11454   }
11455   return true;
11456 }
11457
11458 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
11459                                   SDValue OrigLoad, SDValue ExtLoad,
11460                                   ISD::NodeType ExtType) {
11461   // Extend SetCC uses if necessary.
11462   SDLoc DL(ExtLoad);
11463   for (SDNode *SetCC : SetCCs) {
11464     SmallVector<SDValue, 4> Ops;
11465
11466     for (unsigned j = 0; j != 2; ++j) {
11467       SDValue SOp = SetCC->getOperand(j);
11468       if (SOp == OrigLoad)
11469         Ops.push_back(ExtLoad);
11470       else
11471         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
11472     }
11473
11474     Ops.push_back(SetCC->getOperand(2));
11475     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
11476   }
11477 }
11478
11479 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
11480 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11481   SDValue N0 = N->getOperand(0);
11482   EVT DstVT = N->getValueType(0);
11483   EVT SrcVT = N0.getValueType();
11484
11485   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11486           N->getOpcode() == ISD::ZERO_EXTEND) &&
11487          "Unexpected node type (not an extend)!");
11488
11489   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11490   // For example, on a target with legal v4i32, but illegal v8i32, turn:
11491   //   (v8i32 (sext (v8i16 (load x))))
11492   // into:
11493   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
11494   //                          (v4i32 (sextload (x + 16)))))
11495   // Where uses of the original load, i.e.:
11496   //   (v8i16 (load x))
11497   // are replaced with:
11498   //   (v8i16 (truncate
11499   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
11500   //                            (v4i32 (sextload (x + 16)))))))
11501   //
11502   // This combine is only applicable to illegal, but splittable, vectors.
11503   // All legal types, and illegal non-vector types, are handled elsewhere.
11504   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11505   //
11506   if (N0->getOpcode() != ISD::LOAD)
11507     return SDValue();
11508
11509   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11510
11511   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11512       !N0.hasOneUse() || !LN0->isSimple() ||
11513       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11514       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11515     return SDValue();
11516
11517   SmallVector<SDNode *, 4> SetCCs;
11518   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11519     return SDValue();
11520
11521   ISD::LoadExtType ExtType =
11522       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11523
11524   // Try to split the vector types to get down to legal types.
11525   EVT SplitSrcVT = SrcVT;
11526   EVT SplitDstVT = DstVT;
11527   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11528          SplitSrcVT.getVectorNumElements() > 1) {
11529     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11530     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11531   }
11532
11533   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11534     return SDValue();
11535
11536   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11537
11538   SDLoc DL(N);
11539   const unsigned NumSplits =
11540       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11541   const unsigned Stride = SplitSrcVT.getStoreSize();
11542   SmallVector<SDValue, 4> Loads;
11543   SmallVector<SDValue, 4> Chains;
11544
11545   SDValue BasePtr = LN0->getBasePtr();
11546   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11547     const unsigned Offset = Idx * Stride;
11548     const Align Align = commonAlignment(LN0->getAlign(), Offset);
11549
11550     SDValue SplitLoad = DAG.getExtLoad(
11551         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11552         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11553         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11554
11555     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11556
11557     Loads.push_back(SplitLoad.getValue(0));
11558     Chains.push_back(SplitLoad.getValue(1));
11559   }
11560
11561   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11562   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11563
11564   // Simplify TF.
11565   AddToWorklist(NewChain.getNode());
11566
11567   CombineTo(N, NewValue);
11568
11569   // Replace uses of the original load (before extension)
11570   // with a truncate of the concatenated sextloaded vectors.
11571   SDValue Trunc =
11572       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11573   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11574   CombineTo(N0.getNode(), Trunc, NewChain);
11575   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11576 }
11577
11578 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11579 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11580 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11581   assert(N->getOpcode() == ISD::ZERO_EXTEND);
11582   EVT VT = N->getValueType(0);
11583   EVT OrigVT = N->getOperand(0).getValueType();
11584   if (TLI.isZExtFree(OrigVT, VT))
11585     return SDValue();
11586
11587   // and/or/xor
11588   SDValue N0 = N->getOperand(0);
11589   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11590         N0.getOpcode() == ISD::XOR) ||
11591       N0.getOperand(1).getOpcode() != ISD::Constant ||
11592       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11593     return SDValue();
11594
11595   // shl/shr
11596   SDValue N1 = N0->getOperand(0);
11597   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11598       N1.getOperand(1).getOpcode() != ISD::Constant ||
11599       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11600     return SDValue();
11601
11602   // load
11603   if (!isa<LoadSDNode>(N1.getOperand(0)))
11604     return SDValue();
11605   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11606   EVT MemVT = Load->getMemoryVT();
11607   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11608       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11609     return SDValue();
11610
11611
11612   // If the shift op is SHL, the logic op must be AND, otherwise the result
11613   // will be wrong.
11614   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11615     return SDValue();
11616
11617   if (!N0.hasOneUse() || !N1.hasOneUse())
11618     return SDValue();
11619
11620   SmallVector<SDNode*, 4> SetCCs;
11621   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11622                                ISD::ZERO_EXTEND, SetCCs, TLI))
11623     return SDValue();
11624
11625   // Actually do the transformation.
11626   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11627                                    Load->getChain(), Load->getBasePtr(),
11628                                    Load->getMemoryVT(), Load->getMemOperand());
11629
11630   SDLoc DL1(N1);
11631   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11632                               N1.getOperand(1));
11633
11634   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11635   SDLoc DL0(N0);
11636   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11637                             DAG.getConstant(Mask, DL0, VT));
11638
11639   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11640   CombineTo(N, And);
11641   if (SDValue(Load, 0).hasOneUse()) {
11642     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11643   } else {
11644     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11645                                 Load->getValueType(0), ExtLoad);
11646     CombineTo(Load, Trunc, ExtLoad.getValue(1));
11647   }
11648
11649   // N0 is dead at this point.
11650   recursivelyDeleteUnusedNodes(N0.getNode());
11651
11652   return SDValue(N,0); // Return N so it doesn't get rechecked!
11653 }
11654
11655 /// If we're narrowing or widening the result of a vector select and the final
11656 /// size is the same size as a setcc (compare) feeding the select, then try to
11657 /// apply the cast operation to the select's operands because matching vector
11658 /// sizes for a select condition and other operands should be more efficient.
11659 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11660   unsigned CastOpcode = Cast->getOpcode();
11661   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11662           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11663           CastOpcode == ISD::FP_ROUND) &&
11664          "Unexpected opcode for vector select narrowing/widening");
11665
11666   // We only do this transform before legal ops because the pattern may be
11667   // obfuscated by target-specific operations after legalization. Do not create
11668   // an illegal select op, however, because that may be difficult to lower.
11669   EVT VT = Cast->getValueType(0);
11670   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11671     return SDValue();
11672
11673   SDValue VSel = Cast->getOperand(0);
11674   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11675       VSel.getOperand(0).getOpcode() != ISD::SETCC)
11676     return SDValue();
11677
11678   // Does the setcc have the same vector size as the casted select?
11679   SDValue SetCC = VSel.getOperand(0);
11680   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11681   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11682     return SDValue();
11683
11684   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11685   SDValue A = VSel.getOperand(1);
11686   SDValue B = VSel.getOperand(2);
11687   SDValue CastA, CastB;
11688   SDLoc DL(Cast);
11689   if (CastOpcode == ISD::FP_ROUND) {
11690     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11691     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11692     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11693   } else {
11694     CastA = DAG.getNode(CastOpcode, DL, VT, A);
11695     CastB = DAG.getNode(CastOpcode, DL, VT, B);
11696   }
11697   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11698 }
11699
11700 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11701 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11702 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11703                                      const TargetLowering &TLI, EVT VT,
11704                                      bool LegalOperations, SDNode *N,
11705                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
11706   SDNode *N0Node = N0.getNode();
11707   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11708                                                    : ISD::isZEXTLoad(N0Node);
11709   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11710       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11711     return SDValue();
11712
11713   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11714   EVT MemVT = LN0->getMemoryVT();
11715   if ((LegalOperations || !LN0->isSimple() ||
11716        VT.isVector()) &&
11717       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11718     return SDValue();
11719
11720   SDValue ExtLoad =
11721       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11722                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11723   Combiner.CombineTo(N, ExtLoad);
11724   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11725   if (LN0->use_empty())
11726     Combiner.recursivelyDeleteUnusedNodes(LN0);
11727   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11728 }
11729
11730 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11731 // Only generate vector extloads when 1) they're legal, and 2) they are
11732 // deemed desirable by the target.
11733 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11734                                   const TargetLowering &TLI, EVT VT,
11735                                   bool LegalOperations, SDNode *N, SDValue N0,
11736                                   ISD::LoadExtType ExtLoadType,
11737                                   ISD::NodeType ExtOpc) {
11738   // TODO: isFixedLengthVector() should be removed and any negative effects on
11739   // code generation being the result of that target's implementation of
11740   // isVectorLoadExtDesirable().
11741   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11742       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11743       ((LegalOperations || VT.isFixedLengthVector() ||
11744         !cast<LoadSDNode>(N0)->isSimple()) &&
11745        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11746     return {};
11747
11748   bool DoXform = true;
11749   SmallVector<SDNode *, 4> SetCCs;
11750   if (!N0.hasOneUse())
11751     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11752   if (VT.isVector())
11753     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11754   if (!DoXform)
11755     return {};
11756
11757   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11758   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11759                                    LN0->getBasePtr(), N0.getValueType(),
11760                                    LN0->getMemOperand());
11761   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11762   // If the load value is used only by N, replace it via CombineTo N.
11763   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11764   Combiner.CombineTo(N, ExtLoad);
11765   if (NoReplaceTrunc) {
11766     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11767     Combiner.recursivelyDeleteUnusedNodes(LN0);
11768   } else {
11769     SDValue Trunc =
11770         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11771     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11772   }
11773   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11774 }
11775
11776 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11777                                         const TargetLowering &TLI, EVT VT,
11778                                         SDNode *N, SDValue N0,
11779                                         ISD::LoadExtType ExtLoadType,
11780                                         ISD::NodeType ExtOpc) {
11781   if (!N0.hasOneUse())
11782     return SDValue();
11783
11784   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11785   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11786     return SDValue();
11787
11788   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11789     return SDValue();
11790
11791   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11792     return SDValue();
11793
11794   SDLoc dl(Ld);
11795   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11796   SDValue NewLoad = DAG.getMaskedLoad(
11797       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11798       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11799       ExtLoadType, Ld->isExpandingLoad());
11800   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11801   return NewLoad;
11802 }
11803
11804 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11805                                        bool LegalOperations) {
11806   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11807           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11808
11809   SDValue SetCC = N->getOperand(0);
11810   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11811       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11812     return SDValue();
11813
11814   SDValue X = SetCC.getOperand(0);
11815   SDValue Ones = SetCC.getOperand(1);
11816   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11817   EVT VT = N->getValueType(0);
11818   EVT XVT = X.getValueType();
11819   // setge X, C is canonicalized to setgt, so we do not need to match that
11820   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11821   // not require the 'not' op.
11822   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11823     // Invert and smear/shift the sign bit:
11824     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11825     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11826     SDLoc DL(N);
11827     unsigned ShCt = VT.getSizeInBits() - 1;
11828     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11829     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11830       SDValue NotX = DAG.getNOT(DL, X, VT);
11831       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11832       auto ShiftOpcode =
11833         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11834       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11835     }
11836   }
11837   return SDValue();
11838 }
11839
11840 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11841   SDValue N0 = N->getOperand(0);
11842   if (N0.getOpcode() != ISD::SETCC)
11843     return SDValue();
11844
11845   SDValue N00 = N0.getOperand(0);
11846   SDValue N01 = N0.getOperand(1);
11847   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11848   EVT VT = N->getValueType(0);
11849   EVT N00VT = N00.getValueType();
11850   SDLoc DL(N);
11851
11852   // Propagate fast-math-flags.
11853   SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
11854
11855   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11856   // the same size as the compared operands. Try to optimize sext(setcc())
11857   // if this is the case.
11858   if (VT.isVector() && !LegalOperations &&
11859       TLI.getBooleanContents(N00VT) ==
11860           TargetLowering::ZeroOrNegativeOneBooleanContent) {
11861     EVT SVT = getSetCCResultType(N00VT);
11862
11863     // If we already have the desired type, don't change it.
11864     if (SVT != N0.getValueType()) {
11865       // We know that the # elements of the results is the same as the
11866       // # elements of the compare (and the # elements of the compare result
11867       // for that matter).  Check to see that they are the same size.  If so,
11868       // we know that the element size of the sext'd result matches the
11869       // element size of the compare operands.
11870       if (VT.getSizeInBits() == SVT.getSizeInBits())
11871         return DAG.getSetCC(DL, VT, N00, N01, CC);
11872
11873       // If the desired elements are smaller or larger than the source
11874       // elements, we can use a matching integer vector type and then
11875       // truncate/sign extend.
11876       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11877       if (SVT == MatchingVecType) {
11878         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11879         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11880       }
11881     }
11882
11883     // Try to eliminate the sext of a setcc by zexting the compare operands.
11884     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11885         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11886       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11887       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11888       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11889
11890       // We have an unsupported narrow vector compare op that would be legal
11891       // if extended to the destination type. See if the compare operands
11892       // can be freely extended to the destination type.
11893       auto IsFreeToExtend = [&](SDValue V) {
11894         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11895           return true;
11896         // Match a simple, non-extended load that can be converted to a
11897         // legal {z/s}ext-load.
11898         // TODO: Allow widening of an existing {z/s}ext-load?
11899         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11900               ISD::isUNINDEXEDLoad(V.getNode()) &&
11901               cast<LoadSDNode>(V)->isSimple() &&
11902               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11903           return false;
11904
11905         // Non-chain users of this value must either be the setcc in this
11906         // sequence or extends that can be folded into the new {z/s}ext-load.
11907         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11908              UI != UE; ++UI) {
11909           // Skip uses of the chain and the setcc.
11910           SDNode *User = *UI;
11911           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11912             continue;
11913           // Extra users must have exactly the same cast we are about to create.
11914           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11915           //       is enhanced similarly.
11916           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11917             return false;
11918         }
11919         return true;
11920       };
11921
11922       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11923         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11924         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11925         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11926       }
11927     }
11928   }
11929
11930   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11931   // Here, T can be 1 or -1, depending on the type of the setcc and
11932   // getBooleanContents().
11933   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11934
11935   // To determine the "true" side of the select, we need to know the high bit
11936   // of the value returned by the setcc if it evaluates to true.
11937   // If the type of the setcc is i1, then the true case of the select is just
11938   // sext(i1 1), that is, -1.
11939   // If the type of the setcc is larger (say, i8) then the value of the high
11940   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11941   // of the appropriate width.
11942   SDValue ExtTrueVal = (SetCCWidth == 1)
11943                            ? DAG.getAllOnesConstant(DL, VT)
11944                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11945   SDValue Zero = DAG.getConstant(0, DL, VT);
11946   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11947     return SCC;
11948
11949   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11950     EVT SetCCVT = getSetCCResultType(N00VT);
11951     // Don't do this transform for i1 because there's a select transform
11952     // that would reverse it.
11953     // TODO: We should not do this transform at all without a target hook
11954     // because a sext is likely cheaper than a select?
11955     if (SetCCVT.getScalarSizeInBits() != 1 &&
11956         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11957       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11958       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11959     }
11960   }
11961
11962   return SDValue();
11963 }
11964
11965 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11966   SDValue N0 = N->getOperand(0);
11967   EVT VT = N->getValueType(0);
11968   SDLoc DL(N);
11969
11970   // sext(undef) = 0 because the top bit will all be the same.
11971   if (N0.isUndef())
11972     return DAG.getConstant(0, DL, VT);
11973
11974   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11975     return Res;
11976
11977   // fold (sext (sext x)) -> (sext x)
11978   // fold (sext (aext x)) -> (sext x)
11979   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11980     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11981
11982   if (N0.getOpcode() == ISD::TRUNCATE) {
11983     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11984     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11985     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11986       SDNode *oye = N0.getOperand(0).getNode();
11987       if (NarrowLoad.getNode() != N0.getNode()) {
11988         CombineTo(N0.getNode(), NarrowLoad);
11989         // CombineTo deleted the truncate, if needed, but not what's under it.
11990         AddToWorklist(oye);
11991       }
11992       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11993     }
11994
11995     // See if the value being truncated is already sign extended.  If so, just
11996     // eliminate the trunc/sext pair.
11997     SDValue Op = N0.getOperand(0);
11998     unsigned OpBits   = Op.getScalarValueSizeInBits();
11999     unsigned MidBits  = N0.getScalarValueSizeInBits();
12000     unsigned DestBits = VT.getScalarSizeInBits();
12001     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
12002
12003     if (OpBits == DestBits) {
12004       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
12005       // bits, it is already ready.
12006       if (NumSignBits > DestBits-MidBits)
12007         return Op;
12008     } else if (OpBits < DestBits) {
12009       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
12010       // bits, just sext from i32.
12011       if (NumSignBits > OpBits-MidBits)
12012         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
12013     } else {
12014       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
12015       // bits, just truncate to i32.
12016       if (NumSignBits > OpBits-MidBits)
12017         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
12018     }
12019
12020     // fold (sext (truncate x)) -> (sextinreg x).
12021     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
12022                                                  N0.getValueType())) {
12023       if (OpBits < DestBits)
12024         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
12025       else if (OpBits > DestBits)
12026         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
12027       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
12028                          DAG.getValueType(N0.getValueType()));
12029     }
12030   }
12031
12032   // Try to simplify (sext (load x)).
12033   if (SDValue foldedExt =
12034           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12035                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
12036     return foldedExt;
12037
12038   if (SDValue foldedExt =
12039       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
12040                                ISD::SIGN_EXTEND))
12041     return foldedExt;
12042
12043   // fold (sext (load x)) to multiple smaller sextloads.
12044   // Only on illegal but splittable vectors.
12045   if (SDValue ExtLoad = CombineExtLoad(N))
12046     return ExtLoad;
12047
12048   // Try to simplify (sext (sextload x)).
12049   if (SDValue foldedExt = tryToFoldExtOfExtload(
12050           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
12051     return foldedExt;
12052
12053   // fold (sext (and/or/xor (load x), cst)) ->
12054   //      (and/or/xor (sextload x), (sext cst))
12055   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12056        N0.getOpcode() == ISD::XOR) &&
12057       isa<LoadSDNode>(N0.getOperand(0)) &&
12058       N0.getOperand(1).getOpcode() == ISD::Constant &&
12059       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12060     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12061     EVT MemVT = LN00->getMemoryVT();
12062     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
12063       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
12064       SmallVector<SDNode*, 4> SetCCs;
12065       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12066                                              ISD::SIGN_EXTEND, SetCCs, TLI);
12067       if (DoXform) {
12068         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
12069                                          LN00->getChain(), LN00->getBasePtr(),
12070                                          LN00->getMemoryVT(),
12071                                          LN00->getMemOperand());
12072         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
12073         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12074                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
12075         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
12076         bool NoReplaceTruncAnd = !N0.hasOneUse();
12077         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12078         CombineTo(N, And);
12079         // If N0 has multiple uses, change other uses as well.
12080         if (NoReplaceTruncAnd) {
12081           SDValue TruncAnd =
12082               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12083           CombineTo(N0.getNode(), TruncAnd);
12084         }
12085         if (NoReplaceTrunc) {
12086           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12087         } else {
12088           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12089                                       LN00->getValueType(0), ExtLoad);
12090           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12091         }
12092         return SDValue(N,0); // Return N so it doesn't get rechecked!
12093       }
12094     }
12095   }
12096
12097   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12098     return V;
12099
12100   if (SDValue V = foldSextSetcc(N))
12101     return V;
12102
12103   // fold (sext x) -> (zext x) if the sign bit is known zero.
12104   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
12105       DAG.SignBitIsZero(N0))
12106     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
12107
12108   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12109     return NewVSel;
12110
12111   // Eliminate this sign extend by doing a negation in the destination type:
12112   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
12113   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
12114       isNullOrNullSplat(N0.getOperand(0)) &&
12115       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
12116       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
12117     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
12118     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
12119   }
12120   // Eliminate this sign extend by doing a decrement in the destination type:
12121   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
12122   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
12123       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
12124       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12125       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
12126     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12127     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12128   }
12129
12130   // fold sext (not i1 X) -> add (zext i1 X), -1
12131   // TODO: This could be extended to handle bool vectors.
12132   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
12133       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
12134                             TLI.isOperationLegal(ISD::ADD, VT)))) {
12135     // If we can eliminate the 'not', the sext form should be better
12136     if (SDValue NewXor = visitXOR(N0.getNode())) {
12137       // Returning N0 is a form of in-visit replacement that may have
12138       // invalidated N0.
12139       if (NewXor.getNode() == N0.getNode()) {
12140         // Return SDValue here as the xor should have already been replaced in
12141         // this sext.
12142         return SDValue();
12143       }
12144
12145       // Return a new sext with the new xor.
12146       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
12147     }
12148
12149     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
12150     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12151   }
12152
12153   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12154     return Res;
12155
12156   return SDValue();
12157 }
12158
12159 // isTruncateOf - If N is a truncate of some other value, return true, record
12160 // the value being truncated in Op and which of Op's bits are zero/one in Known.
12161 // This function computes KnownBits to avoid a duplicated call to
12162 // computeKnownBits in the caller.
12163 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
12164                          KnownBits &Known) {
12165   if (N->getOpcode() == ISD::TRUNCATE) {
12166     Op = N->getOperand(0);
12167     Known = DAG.computeKnownBits(Op);
12168     return true;
12169   }
12170
12171   if (N.getOpcode() != ISD::SETCC ||
12172       N.getValueType().getScalarType() != MVT::i1 ||
12173       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
12174     return false;
12175
12176   SDValue Op0 = N->getOperand(0);
12177   SDValue Op1 = N->getOperand(1);
12178   assert(Op0.getValueType() == Op1.getValueType());
12179
12180   if (isNullOrNullSplat(Op0))
12181     Op = Op1;
12182   else if (isNullOrNullSplat(Op1))
12183     Op = Op0;
12184   else
12185     return false;
12186
12187   Known = DAG.computeKnownBits(Op);
12188
12189   return (Known.Zero | 1).isAllOnes();
12190 }
12191
12192 /// Given an extending node with a pop-count operand, if the target does not
12193 /// support a pop-count in the narrow source type but does support it in the
12194 /// destination type, widen the pop-count to the destination type.
12195 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
12196   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
12197           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
12198
12199   SDValue CtPop = Extend->getOperand(0);
12200   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
12201     return SDValue();
12202
12203   EVT VT = Extend->getValueType(0);
12204   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12205   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
12206       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
12207     return SDValue();
12208
12209   // zext (ctpop X) --> ctpop (zext X)
12210   SDLoc DL(Extend);
12211   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
12212   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
12213 }
12214
12215 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
12216   SDValue N0 = N->getOperand(0);
12217   EVT VT = N->getValueType(0);
12218
12219   // zext(undef) = 0
12220   if (N0.isUndef())
12221     return DAG.getConstant(0, SDLoc(N), VT);
12222
12223   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12224     return Res;
12225
12226   // fold (zext (zext x)) -> (zext x)
12227   // fold (zext (aext x)) -> (zext x)
12228   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
12229     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
12230                        N0.getOperand(0));
12231
12232   // fold (zext (truncate x)) -> (zext x) or
12233   //      (zext (truncate x)) -> (truncate x)
12234   // This is valid when the truncated bits of x are already zero.
12235   SDValue Op;
12236   KnownBits Known;
12237   if (isTruncateOf(DAG, N0, Op, Known)) {
12238     APInt TruncatedBits =
12239       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
12240       APInt(Op.getScalarValueSizeInBits(), 0) :
12241       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
12242                         N0.getScalarValueSizeInBits(),
12243                         std::min(Op.getScalarValueSizeInBits(),
12244                                  VT.getScalarSizeInBits()));
12245     if (TruncatedBits.isSubsetOf(Known.Zero))
12246       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12247   }
12248
12249   // fold (zext (truncate x)) -> (and x, mask)
12250   if (N0.getOpcode() == ISD::TRUNCATE) {
12251     // fold (zext (truncate (load x))) -> (zext (smaller load x))
12252     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
12253     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12254       SDNode *oye = N0.getOperand(0).getNode();
12255       if (NarrowLoad.getNode() != N0.getNode()) {
12256         CombineTo(N0.getNode(), NarrowLoad);
12257         // CombineTo deleted the truncate, if needed, but not what's under it.
12258         AddToWorklist(oye);
12259       }
12260       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12261     }
12262
12263     EVT SrcVT = N0.getOperand(0).getValueType();
12264     EVT MinVT = N0.getValueType();
12265
12266     // Try to mask before the extension to avoid having to generate a larger mask,
12267     // possibly over several sub-vectors.
12268     if (SrcVT.bitsLT(VT) && VT.isVector()) {
12269       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
12270                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
12271         SDValue Op = N0.getOperand(0);
12272         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12273         AddToWorklist(Op.getNode());
12274         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12275         // Transfer the debug info; the new node is equivalent to N0.
12276         DAG.transferDbgValues(N0, ZExtOrTrunc);
12277         return ZExtOrTrunc;
12278       }
12279     }
12280
12281     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
12282       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12283       AddToWorklist(Op.getNode());
12284       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12285       // We may safely transfer the debug info describing the truncate node over
12286       // to the equivalent and operation.
12287       DAG.transferDbgValues(N0, And);
12288       return And;
12289     }
12290   }
12291
12292   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
12293   // if either of the casts is not free.
12294   if (N0.getOpcode() == ISD::AND &&
12295       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12296       N0.getOperand(1).getOpcode() == ISD::Constant &&
12297       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12298                            N0.getValueType()) ||
12299        !TLI.isZExtFree(N0.getValueType(), VT))) {
12300     SDValue X = N0.getOperand(0).getOperand(0);
12301     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
12302     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12303     SDLoc DL(N);
12304     return DAG.getNode(ISD::AND, DL, VT,
12305                        X, DAG.getConstant(Mask, DL, VT));
12306   }
12307
12308   // Try to simplify (zext (load x)).
12309   if (SDValue foldedExt =
12310           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12311                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12312     return foldedExt;
12313
12314   if (SDValue foldedExt =
12315       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
12316                                ISD::ZERO_EXTEND))
12317     return foldedExt;
12318
12319   // fold (zext (load x)) to multiple smaller zextloads.
12320   // Only on illegal but splittable vectors.
12321   if (SDValue ExtLoad = CombineExtLoad(N))
12322     return ExtLoad;
12323
12324   // fold (zext (and/or/xor (load x), cst)) ->
12325   //      (and/or/xor (zextload x), (zext cst))
12326   // Unless (and (load x) cst) will match as a zextload already and has
12327   // additional users.
12328   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12329        N0.getOpcode() == ISD::XOR) &&
12330       isa<LoadSDNode>(N0.getOperand(0)) &&
12331       N0.getOperand(1).getOpcode() == ISD::Constant &&
12332       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12333     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12334     EVT MemVT = LN00->getMemoryVT();
12335     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
12336         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
12337       bool DoXform = true;
12338       SmallVector<SDNode*, 4> SetCCs;
12339       if (!N0.hasOneUse()) {
12340         if (N0.getOpcode() == ISD::AND) {
12341           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
12342           EVT LoadResultTy = AndC->getValueType(0);
12343           EVT ExtVT;
12344           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
12345             DoXform = false;
12346         }
12347       }
12348       if (DoXform)
12349         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12350                                           ISD::ZERO_EXTEND, SetCCs, TLI);
12351       if (DoXform) {
12352         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
12353                                          LN00->getChain(), LN00->getBasePtr(),
12354                                          LN00->getMemoryVT(),
12355                                          LN00->getMemOperand());
12356         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12357         SDLoc DL(N);
12358         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12359                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
12360         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12361         bool NoReplaceTruncAnd = !N0.hasOneUse();
12362         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12363         CombineTo(N, And);
12364         // If N0 has multiple uses, change other uses as well.
12365         if (NoReplaceTruncAnd) {
12366           SDValue TruncAnd =
12367               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12368           CombineTo(N0.getNode(), TruncAnd);
12369         }
12370         if (NoReplaceTrunc) {
12371           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12372         } else {
12373           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12374                                       LN00->getValueType(0), ExtLoad);
12375           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12376         }
12377         return SDValue(N,0); // Return N so it doesn't get rechecked!
12378       }
12379     }
12380   }
12381
12382   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12383   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12384   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
12385     return ZExtLoad;
12386
12387   // Try to simplify (zext (zextload x)).
12388   if (SDValue foldedExt = tryToFoldExtOfExtload(
12389           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
12390     return foldedExt;
12391
12392   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12393     return V;
12394
12395   if (N0.getOpcode() == ISD::SETCC) {
12396     // Propagate fast-math-flags.
12397     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12398
12399     // Only do this before legalize for now.
12400     if (!LegalOperations && VT.isVector() &&
12401         N0.getValueType().getVectorElementType() == MVT::i1) {
12402       EVT N00VT = N0.getOperand(0).getValueType();
12403       if (getSetCCResultType(N00VT) == N0.getValueType())
12404         return SDValue();
12405
12406       // We know that the # elements of the results is the same as the #
12407       // elements of the compare (and the # elements of the compare result for
12408       // that matter). Check to see that they are the same size. If so, we know
12409       // that the element size of the sext'd result matches the element size of
12410       // the compare operands.
12411       SDLoc DL(N);
12412       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
12413         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
12414         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
12415                                      N0.getOperand(1), N0.getOperand(2));
12416         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
12417       }
12418
12419       // If the desired elements are smaller or larger than the source
12420       // elements we can use a matching integer vector type and then
12421       // truncate/any extend followed by zext_in_reg.
12422       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12423       SDValue VsetCC =
12424           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
12425                       N0.getOperand(1), N0.getOperand(2));
12426       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
12427                                     N0.getValueType());
12428     }
12429
12430     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
12431     SDLoc DL(N);
12432     EVT N0VT = N0.getValueType();
12433     EVT N00VT = N0.getOperand(0).getValueType();
12434     if (SDValue SCC = SimplifySelectCC(
12435             DL, N0.getOperand(0), N0.getOperand(1),
12436             DAG.getBoolConstant(true, DL, N0VT, N00VT),
12437             DAG.getBoolConstant(false, DL, N0VT, N00VT),
12438             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12439       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
12440   }
12441
12442   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
12443   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
12444       isa<ConstantSDNode>(N0.getOperand(1)) &&
12445       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12446       N0.hasOneUse()) {
12447     SDValue ShAmt = N0.getOperand(1);
12448     if (N0.getOpcode() == ISD::SHL) {
12449       SDValue InnerZExt = N0.getOperand(0);
12450       // If the original shl may be shifting out bits, do not perform this
12451       // transformation.
12452       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
12453         InnerZExt.getOperand(0).getValueSizeInBits();
12454       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
12455         return SDValue();
12456     }
12457
12458     SDLoc DL(N);
12459
12460     // Ensure that the shift amount is wide enough for the shifted value.
12461     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
12462       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
12463
12464     return DAG.getNode(N0.getOpcode(), DL, VT,
12465                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
12466                        ShAmt);
12467   }
12468
12469   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12470     return NewVSel;
12471
12472   if (SDValue NewCtPop = widenCtPop(N, DAG))
12473     return NewCtPop;
12474
12475   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12476     return Res;
12477
12478   return SDValue();
12479 }
12480
12481 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
12482   SDValue N0 = N->getOperand(0);
12483   EVT VT = N->getValueType(0);
12484
12485   // aext(undef) = undef
12486   if (N0.isUndef())
12487     return DAG.getUNDEF(VT);
12488
12489   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12490     return Res;
12491
12492   // fold (aext (aext x)) -> (aext x)
12493   // fold (aext (zext x)) -> (zext x)
12494   // fold (aext (sext x)) -> (sext x)
12495   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
12496       N0.getOpcode() == ISD::ZERO_EXTEND ||
12497       N0.getOpcode() == ISD::SIGN_EXTEND)
12498     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12499
12500   // fold (aext (truncate (load x))) -> (aext (smaller load x))
12501   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12502   if (N0.getOpcode() == ISD::TRUNCATE) {
12503     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12504       SDNode *oye = N0.getOperand(0).getNode();
12505       if (NarrowLoad.getNode() != N0.getNode()) {
12506         CombineTo(N0.getNode(), NarrowLoad);
12507         // CombineTo deleted the truncate, if needed, but not what's under it.
12508         AddToWorklist(oye);
12509       }
12510       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12511     }
12512   }
12513
12514   // fold (aext (truncate x))
12515   if (N0.getOpcode() == ISD::TRUNCATE)
12516     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12517
12518   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12519   // if the trunc is not free.
12520   if (N0.getOpcode() == ISD::AND &&
12521       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12522       N0.getOperand(1).getOpcode() == ISD::Constant &&
12523       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12524                           N0.getValueType())) {
12525     SDLoc DL(N);
12526     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12527     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
12528     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
12529     return DAG.getNode(ISD::AND, DL, VT, X, Y);
12530   }
12531
12532   // fold (aext (load x)) -> (aext (truncate (extload x)))
12533   // None of the supported targets knows how to perform load and any_ext
12534   // on vectors in one instruction, so attempt to fold to zext instead.
12535   if (VT.isVector()) {
12536     // Try to simplify (zext (load x)).
12537     if (SDValue foldedExt =
12538             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12539                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12540       return foldedExt;
12541   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12542              ISD::isUNINDEXEDLoad(N0.getNode()) &&
12543              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12544     bool DoXform = true;
12545     SmallVector<SDNode *, 4> SetCCs;
12546     if (!N0.hasOneUse())
12547       DoXform =
12548           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12549     if (DoXform) {
12550       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12551       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12552                                        LN0->getChain(), LN0->getBasePtr(),
12553                                        N0.getValueType(), LN0->getMemOperand());
12554       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12555       // If the load value is used only by N, replace it via CombineTo N.
12556       bool NoReplaceTrunc = N0.hasOneUse();
12557       CombineTo(N, ExtLoad);
12558       if (NoReplaceTrunc) {
12559         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12560         recursivelyDeleteUnusedNodes(LN0);
12561       } else {
12562         SDValue Trunc =
12563             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12564         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12565       }
12566       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12567     }
12568   }
12569
12570   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12571   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12572   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
12573   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12574       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12575     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12576     ISD::LoadExtType ExtType = LN0->getExtensionType();
12577     EVT MemVT = LN0->getMemoryVT();
12578     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12579       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12580                                        VT, LN0->getChain(), LN0->getBasePtr(),
12581                                        MemVT, LN0->getMemOperand());
12582       CombineTo(N, ExtLoad);
12583       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12584       recursivelyDeleteUnusedNodes(LN0);
12585       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12586     }
12587   }
12588
12589   if (N0.getOpcode() == ISD::SETCC) {
12590     // Propagate fast-math-flags.
12591     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12592
12593     // For vectors:
12594     // aext(setcc) -> vsetcc
12595     // aext(setcc) -> truncate(vsetcc)
12596     // aext(setcc) -> aext(vsetcc)
12597     // Only do this before legalize for now.
12598     if (VT.isVector() && !LegalOperations) {
12599       EVT N00VT = N0.getOperand(0).getValueType();
12600       if (getSetCCResultType(N00VT) == N0.getValueType())
12601         return SDValue();
12602
12603       // We know that the # elements of the results is the same as the
12604       // # elements of the compare (and the # elements of the compare result
12605       // for that matter).  Check to see that they are the same size.  If so,
12606       // we know that the element size of the sext'd result matches the
12607       // element size of the compare operands.
12608       if (VT.getSizeInBits() == N00VT.getSizeInBits())
12609         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12610                              N0.getOperand(1),
12611                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
12612
12613       // If the desired elements are smaller or larger than the source
12614       // elements we can use a matching integer vector type and then
12615       // truncate/any extend
12616       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12617       SDValue VsetCC =
12618         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12619                       N0.getOperand(1),
12620                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
12621       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12622     }
12623
12624     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12625     SDLoc DL(N);
12626     if (SDValue SCC = SimplifySelectCC(
12627             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12628             DAG.getConstant(0, DL, VT),
12629             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12630       return SCC;
12631   }
12632
12633   if (SDValue NewCtPop = widenCtPop(N, DAG))
12634     return NewCtPop;
12635
12636   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12637     return Res;
12638
12639   return SDValue();
12640 }
12641
12642 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12643   unsigned Opcode = N->getOpcode();
12644   SDValue N0 = N->getOperand(0);
12645   SDValue N1 = N->getOperand(1);
12646   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12647
12648   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12649   if (N0.getOpcode() == Opcode &&
12650       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12651     return N0;
12652
12653   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12654       N0.getOperand(0).getOpcode() == Opcode) {
12655     // We have an assert, truncate, assert sandwich. Make one stronger assert
12656     // by asserting on the smallest asserted type to the larger source type.
12657     // This eliminates the later assert:
12658     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12659     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12660     SDLoc DL(N);
12661     SDValue BigA = N0.getOperand(0);
12662     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12663     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12664     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12665     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12666                                     BigA.getOperand(0), MinAssertVTVal);
12667     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12668   }
12669
12670   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12671   // than X. Just move the AssertZext in front of the truncate and drop the
12672   // AssertSExt.
12673   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12674       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12675       Opcode == ISD::AssertZext) {
12676     SDValue BigA = N0.getOperand(0);
12677     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12678     if (AssertVT.bitsLT(BigA_AssertVT)) {
12679       SDLoc DL(N);
12680       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12681                                       BigA.getOperand(0), N1);
12682       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12683     }
12684   }
12685
12686   return SDValue();
12687 }
12688
12689 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12690   SDLoc DL(N);
12691
12692   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12693   SDValue N0 = N->getOperand(0);
12694
12695   // Fold (assertalign (assertalign x, AL0), AL1) ->
12696   // (assertalign x, max(AL0, AL1))
12697   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12698     return DAG.getAssertAlign(DL, N0.getOperand(0),
12699                               std::max(AL, AAN->getAlign()));
12700
12701   // In rare cases, there are trivial arithmetic ops in source operands. Sink
12702   // this assert down to source operands so that those arithmetic ops could be
12703   // exposed to the DAG combining.
12704   switch (N0.getOpcode()) {
12705   default:
12706     break;
12707   case ISD::ADD:
12708   case ISD::SUB: {
12709     unsigned AlignShift = Log2(AL);
12710     SDValue LHS = N0.getOperand(0);
12711     SDValue RHS = N0.getOperand(1);
12712     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12713     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12714     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12715       if (LHSAlignShift < AlignShift)
12716         LHS = DAG.getAssertAlign(DL, LHS, AL);
12717       if (RHSAlignShift < AlignShift)
12718         RHS = DAG.getAssertAlign(DL, RHS, AL);
12719       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12720     }
12721     break;
12722   }
12723   }
12724
12725   return SDValue();
12726 }
12727
12728 /// If the result of a load is shifted/masked/truncated to an effectively
12729 /// narrower type, try to transform the load to a narrower type and/or
12730 /// use an extending load.
12731 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12732   unsigned Opc = N->getOpcode();
12733
12734   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12735   SDValue N0 = N->getOperand(0);
12736   EVT VT = N->getValueType(0);
12737   EVT ExtVT = VT;
12738
12739   // This transformation isn't valid for vector loads.
12740   if (VT.isVector())
12741     return SDValue();
12742
12743   // The ShAmt variable is used to indicate that we've consumed a right
12744   // shift. I.e. we want to narrow the width of the load by skipping to load the
12745   // ShAmt least significant bits.
12746   unsigned ShAmt = 0;
12747   // A special case is when the least significant bits from the load are masked
12748   // away, but using an AND rather than a right shift. HasShiftedOffset is used
12749   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12750   // the result.
12751   bool HasShiftedOffset = false;
12752   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12753   // extended to VT.
12754   if (Opc == ISD::SIGN_EXTEND_INREG) {
12755     ExtType = ISD::SEXTLOAD;
12756     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12757   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12758     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12759     // value, or it may be shifting a higher subword, half or byte into the
12760     // lowest bits.
12761
12762     // Only handle shift with constant shift amount, and the shiftee must be a
12763     // load.
12764     auto *LN = dyn_cast<LoadSDNode>(N0);
12765     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12766     if (!N1C || !LN)
12767       return SDValue();
12768     // If the shift amount is larger than the memory type then we're not
12769     // accessing any of the loaded bytes.
12770     ShAmt = N1C->getZExtValue();
12771     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12772     if (MemoryWidth <= ShAmt)
12773       return SDValue();
12774     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12775     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12776     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12777     // If original load is a SEXTLOAD then we can't simply replace it by a
12778     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12779     // followed by a ZEXT, but that is not handled at the moment). Similarly if
12780     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12781     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12782          LN->getExtensionType() == ISD::ZEXTLOAD) &&
12783         LN->getExtensionType() != ExtType)
12784       return SDValue();
12785   } else if (Opc == ISD::AND) {
12786     // An AND with a constant mask is the same as a truncate + zero-extend.
12787     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12788     if (!AndC)
12789       return SDValue();
12790
12791     const APInt &Mask = AndC->getAPIntValue();
12792     unsigned ActiveBits = 0;
12793     if (Mask.isMask()) {
12794       ActiveBits = Mask.countTrailingOnes();
12795     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
12796       HasShiftedOffset = true;
12797     } else {
12798       return SDValue();
12799     }
12800
12801     ExtType = ISD::ZEXTLOAD;
12802     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12803   }
12804
12805   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12806   // a right shift. Here we redo some of those checks, to possibly adjust the
12807   // ExtVT even further based on "a masking AND". We could also end up here for
12808   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12809   // need to be done here as well.
12810   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12811     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12812     // Bail out when the SRL has more than one use. This is done for historical
12813     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12814     // check below? And maybe it could be non-profitable to do the transform in
12815     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12816     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12817     if (!SRL.hasOneUse())
12818       return SDValue();
12819
12820     // Only handle shift with constant shift amount, and the shiftee must be a
12821     // load.
12822     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12823     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12824     if (!SRL1C || !LN)
12825       return SDValue();
12826
12827     // If the shift amount is larger than the input type then we're not
12828     // accessing any of the loaded bytes.  If the load was a zextload/extload
12829     // then the result of the shift+trunc is zero/undef (handled elsewhere).
12830     ShAmt = SRL1C->getZExtValue();
12831     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12832     if (ShAmt >= MemoryWidth)
12833       return SDValue();
12834
12835     // Because a SRL must be assumed to *need* to zero-extend the high bits
12836     // (as opposed to anyext the high bits), we can't combine the zextload
12837     // lowering of SRL and an sextload.
12838     if (LN->getExtensionType() == ISD::SEXTLOAD)
12839       return SDValue();
12840
12841     // Avoid reading outside the memory accessed by the original load (could
12842     // happened if we only adjust the load base pointer by ShAmt). Instead we
12843     // try to narrow the load even further. The typical scenario here is:
12844     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
12845     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12846     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12847       // Don't replace sextload by zextload.
12848       if (ExtType == ISD::SEXTLOAD)
12849         return SDValue();
12850       // Narrow the load.
12851       ExtType = ISD::ZEXTLOAD;
12852       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12853     }
12854
12855     // If the SRL is only used by a masking AND, we may be able to adjust
12856     // the ExtVT to make the AND redundant.
12857     SDNode *Mask = *(SRL->use_begin());
12858     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12859         isa<ConstantSDNode>(Mask->getOperand(1))) {
12860       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12861       if (ShiftMask.isMask()) {
12862         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12863                                          ShiftMask.countTrailingOnes());
12864         // If the mask is smaller, recompute the type.
12865         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12866             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12867           ExtVT = MaskedVT;
12868       }
12869     }
12870
12871     N0 = SRL.getOperand(0);
12872   }
12873
12874   // If the load is shifted left (and the result isn't shifted back right), we
12875   // can fold a truncate through the shift. The typical scenario is that N
12876   // points at a TRUNCATE here so the attempted fold is:
12877   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12878   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12879   unsigned ShLeftAmt = 0;
12880   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12881       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12882     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12883       ShLeftAmt = N01->getZExtValue();
12884       N0 = N0.getOperand(0);
12885     }
12886   }
12887
12888   // If we haven't found a load, we can't narrow it.
12889   if (!isa<LoadSDNode>(N0))
12890     return SDValue();
12891
12892   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12893   // Reducing the width of a volatile load is illegal.  For atomics, we may be
12894   // able to reduce the width provided we never widen again. (see D66309)
12895   if (!LN0->isSimple() ||
12896       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12897     return SDValue();
12898
12899   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12900     unsigned LVTStoreBits =
12901         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12902     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12903     return LVTStoreBits - EVTStoreBits - ShAmt;
12904   };
12905
12906   // We need to adjust the pointer to the load by ShAmt bits in order to load
12907   // the correct bytes.
12908   unsigned PtrAdjustmentInBits =
12909       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12910
12911   uint64_t PtrOff = PtrAdjustmentInBits / 8;
12912   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12913   SDLoc DL(LN0);
12914   // The original load itself didn't wrap, so an offset within it doesn't.
12915   SDNodeFlags Flags;
12916   Flags.setNoUnsignedWrap(true);
12917   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12918                                             TypeSize::Fixed(PtrOff), DL, Flags);
12919   AddToWorklist(NewPtr.getNode());
12920
12921   SDValue Load;
12922   if (ExtType == ISD::NON_EXTLOAD)
12923     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12924                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12925                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12926   else
12927     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12928                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12929                           NewAlign, LN0->getMemOperand()->getFlags(),
12930                           LN0->getAAInfo());
12931
12932   // Replace the old load's chain with the new load's chain.
12933   WorklistRemover DeadNodes(*this);
12934   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12935
12936   // Shift the result left, if we've swallowed a left shift.
12937   SDValue Result = Load;
12938   if (ShLeftAmt != 0) {
12939     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12940     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12941       ShImmTy = VT;
12942     // If the shift amount is as large as the result size (but, presumably,
12943     // no larger than the source) then the useful bits of the result are
12944     // zero; we can't simply return the shortened shift, because the result
12945     // of that operation is undefined.
12946     if (ShLeftAmt >= VT.getScalarSizeInBits())
12947       Result = DAG.getConstant(0, DL, VT);
12948     else
12949       Result = DAG.getNode(ISD::SHL, DL, VT,
12950                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12951   }
12952
12953   if (HasShiftedOffset) {
12954     // We're using a shifted mask, so the load now has an offset. This means
12955     // that data has been loaded into the lower bytes than it would have been
12956     // before, so we need to shl the loaded data into the correct position in the
12957     // register.
12958     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12959     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12960     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12961   }
12962
12963   // Return the new loaded value.
12964   return Result;
12965 }
12966
12967 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12968   SDValue N0 = N->getOperand(0);
12969   SDValue N1 = N->getOperand(1);
12970   EVT VT = N->getValueType(0);
12971   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12972   unsigned VTBits = VT.getScalarSizeInBits();
12973   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12974
12975   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12976   if (N0.isUndef())
12977     return DAG.getConstant(0, SDLoc(N), VT);
12978
12979   // fold (sext_in_reg c1) -> c1
12980   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12981     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12982
12983   // If the input is already sign extended, just drop the extension.
12984   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12985     return N0;
12986
12987   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12988   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12989       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12990     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12991                        N1);
12992
12993   // fold (sext_in_reg (sext x)) -> (sext x)
12994   // fold (sext_in_reg (aext x)) -> (sext x)
12995   // if x is small enough or if we know that x has more than 1 sign bit and the
12996   // sign_extend_inreg is extending from one of them.
12997   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12998     SDValue N00 = N0.getOperand(0);
12999     unsigned N00Bits = N00.getScalarValueSizeInBits();
13000     if ((N00Bits <= ExtVTBits ||
13001          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
13002         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
13003       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
13004   }
13005
13006   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
13007   // if x is small enough or if we know that x has more than 1 sign bit and the
13008   // sign_extend_inreg is extending from one of them.
13009   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13010       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
13011       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
13012     SDValue N00 = N0.getOperand(0);
13013     unsigned N00Bits = N00.getScalarValueSizeInBits();
13014     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
13015     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
13016     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
13017     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
13018     if ((N00Bits == ExtVTBits ||
13019          (!IsZext && (N00Bits < ExtVTBits ||
13020                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
13021         (!LegalOperations ||
13022          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
13023       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
13024   }
13025
13026   // fold (sext_in_reg (zext x)) -> (sext x)
13027   // iff we are extending the source sign bit.
13028   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
13029     SDValue N00 = N0.getOperand(0);
13030     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
13031         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
13032       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
13033   }
13034
13035   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
13036   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
13037     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
13038
13039   // fold operands of sext_in_reg based on knowledge that the top bits are not
13040   // demanded.
13041   if (SimplifyDemandedBits(SDValue(N, 0)))
13042     return SDValue(N, 0);
13043
13044   // fold (sext_in_reg (load x)) -> (smaller sextload x)
13045   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
13046   if (SDValue NarrowLoad = reduceLoadWidth(N))
13047     return NarrowLoad;
13048
13049   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
13050   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
13051   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
13052   if (N0.getOpcode() == ISD::SRL) {
13053     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
13054       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
13055         // We can turn this into an SRA iff the input to the SRL is already sign
13056         // extended enough.
13057         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
13058         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
13059           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
13060                              N0.getOperand(1));
13061       }
13062   }
13063
13064   // fold (sext_inreg (extload x)) -> (sextload x)
13065   // If sextload is not supported by target, we can only do the combine when
13066   // load has one use. Doing otherwise can block folding the extload with other
13067   // extends that the target does support.
13068   if (ISD::isEXTLoad(N0.getNode()) &&
13069       ISD::isUNINDEXEDLoad(N0.getNode()) &&
13070       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
13071       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
13072         N0.hasOneUse()) ||
13073        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13074     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13075     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13076                                      LN0->getChain(),
13077                                      LN0->getBasePtr(), ExtVT,
13078                                      LN0->getMemOperand());
13079     CombineTo(N, ExtLoad);
13080     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13081     AddToWorklist(ExtLoad.getNode());
13082     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13083   }
13084
13085   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
13086   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
13087       N0.hasOneUse() &&
13088       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
13089       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
13090        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13091     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13092     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13093                                      LN0->getChain(),
13094                                      LN0->getBasePtr(), ExtVT,
13095                                      LN0->getMemOperand());
13096     CombineTo(N, ExtLoad);
13097     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13098     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13099   }
13100
13101   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
13102   // ignore it if the masked load is already sign extended
13103   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
13104     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
13105         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
13106         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
13107       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
13108           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
13109           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
13110           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
13111       CombineTo(N, ExtMaskedLoad);
13112       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
13113       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13114     }
13115   }
13116
13117   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
13118   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
13119     if (SDValue(GN0, 0).hasOneUse() &&
13120         ExtVT == GN0->getMemoryVT() &&
13121         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
13122       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
13123                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
13124
13125       SDValue ExtLoad = DAG.getMaskedGather(
13126           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
13127           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
13128
13129       CombineTo(N, ExtLoad);
13130       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13131       AddToWorklist(ExtLoad.getNode());
13132       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13133     }
13134   }
13135
13136   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
13137   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
13138     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
13139                                            N0.getOperand(1), false))
13140       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
13141   }
13142
13143   return SDValue();
13144 }
13145
13146 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
13147   SDValue N0 = N->getOperand(0);
13148   EVT VT = N->getValueType(0);
13149
13150   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
13151   if (N0.isUndef())
13152     return DAG.getConstant(0, SDLoc(N), VT);
13153
13154   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13155     return Res;
13156
13157   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
13158     return SDValue(N, 0);
13159
13160   return SDValue();
13161 }
13162
13163 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
13164   SDValue N0 = N->getOperand(0);
13165   EVT VT = N->getValueType(0);
13166   EVT SrcVT = N0.getValueType();
13167   bool isLE = DAG.getDataLayout().isLittleEndian();
13168
13169   // noop truncate
13170   if (SrcVT == VT)
13171     return N0;
13172
13173   // fold (truncate (truncate x)) -> (truncate x)
13174   if (N0.getOpcode() == ISD::TRUNCATE)
13175     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13176
13177   // fold (truncate c1) -> c1
13178   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
13179     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
13180     if (C.getNode() != N)
13181       return C;
13182   }
13183
13184   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
13185   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
13186       N0.getOpcode() == ISD::SIGN_EXTEND ||
13187       N0.getOpcode() == ISD::ANY_EXTEND) {
13188     // if the source is smaller than the dest, we still need an extend.
13189     if (N0.getOperand(0).getValueType().bitsLT(VT))
13190       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13191     // if the source is larger than the dest, than we just need the truncate.
13192     if (N0.getOperand(0).getValueType().bitsGT(VT))
13193       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13194     // if the source and dest are the same type, we can drop both the extend
13195     // and the truncate.
13196     return N0.getOperand(0);
13197   }
13198
13199   // Try to narrow a truncate-of-sext_in_reg to the destination type:
13200   // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
13201   if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
13202       N0.hasOneUse()) {
13203     SDValue X = N0.getOperand(0);
13204     SDValue ExtVal = N0.getOperand(1);
13205     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
13206     if (ExtVT.bitsLT(VT)) {
13207       SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
13208       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
13209     }
13210   }
13211
13212   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
13213   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
13214     return SDValue();
13215
13216   // Fold extract-and-trunc into a narrow extract. For example:
13217   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
13218   //   i32 y = TRUNCATE(i64 x)
13219   //        -- becomes --
13220   //   v16i8 b = BITCAST (v2i64 val)
13221   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
13222   //
13223   // Note: We only run this optimization after type legalization (which often
13224   // creates this pattern) and before operation legalization after which
13225   // we need to be more careful about the vector instructions that we generate.
13226   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13227       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
13228     EVT VecTy = N0.getOperand(0).getValueType();
13229     EVT ExTy = N0.getValueType();
13230     EVT TrTy = N->getValueType(0);
13231
13232     auto EltCnt = VecTy.getVectorElementCount();
13233     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
13234     auto NewEltCnt = EltCnt * SizeRatio;
13235
13236     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
13237     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
13238
13239     SDValue EltNo = N0->getOperand(1);
13240     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
13241       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13242       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
13243
13244       SDLoc DL(N);
13245       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
13246                          DAG.getBitcast(NVT, N0.getOperand(0)),
13247                          DAG.getVectorIdxConstant(Index, DL));
13248     }
13249   }
13250
13251   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
13252   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
13253     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
13254         TLI.isTruncateFree(SrcVT, VT)) {
13255       SDLoc SL(N0);
13256       SDValue Cond = N0.getOperand(0);
13257       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13258       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
13259       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
13260     }
13261   }
13262
13263   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
13264   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13265       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
13266       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
13267     SDValue Amt = N0.getOperand(1);
13268     KnownBits Known = DAG.computeKnownBits(Amt);
13269     unsigned Size = VT.getScalarSizeInBits();
13270     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
13271       SDLoc SL(N);
13272       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
13273
13274       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13275       if (AmtVT != Amt.getValueType()) {
13276         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
13277         AddToWorklist(Amt.getNode());
13278       }
13279       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
13280     }
13281   }
13282
13283   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
13284     return V;
13285
13286   // Attempt to pre-truncate BUILD_VECTOR sources.
13287   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
13288       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
13289       // Avoid creating illegal types if running after type legalizer.
13290       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
13291     SDLoc DL(N);
13292     EVT SVT = VT.getScalarType();
13293     SmallVector<SDValue, 8> TruncOps;
13294     for (const SDValue &Op : N0->op_values()) {
13295       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
13296       TruncOps.push_back(TruncOp);
13297     }
13298     return DAG.getBuildVector(VT, DL, TruncOps);
13299   }
13300
13301   // Fold a series of buildvector, bitcast, and truncate if possible.
13302   // For example fold
13303   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
13304   //   (2xi32 (buildvector x, y)).
13305   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
13306       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13307       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
13308       N0.getOperand(0).hasOneUse()) {
13309     SDValue BuildVect = N0.getOperand(0);
13310     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
13311     EVT TruncVecEltTy = VT.getVectorElementType();
13312
13313     // Check that the element types match.
13314     if (BuildVectEltTy == TruncVecEltTy) {
13315       // Now we only need to compute the offset of the truncated elements.
13316       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
13317       unsigned TruncVecNumElts = VT.getVectorNumElements();
13318       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
13319
13320       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
13321              "Invalid number of elements");
13322
13323       SmallVector<SDValue, 8> Opnds;
13324       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
13325         Opnds.push_back(BuildVect.getOperand(i));
13326
13327       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13328     }
13329   }
13330
13331   // fold (truncate (load x)) -> (smaller load x)
13332   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
13333   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
13334     if (SDValue Reduced = reduceLoadWidth(N))
13335       return Reduced;
13336
13337     // Handle the case where the load remains an extending load even
13338     // after truncation.
13339     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
13340       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13341       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
13342         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
13343                                          VT, LN0->getChain(), LN0->getBasePtr(),
13344                                          LN0->getMemoryVT(),
13345                                          LN0->getMemOperand());
13346         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
13347         return NewLoad;
13348       }
13349     }
13350   }
13351
13352   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
13353   // where ... are all 'undef'.
13354   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
13355     SmallVector<EVT, 8> VTs;
13356     SDValue V;
13357     unsigned Idx = 0;
13358     unsigned NumDefs = 0;
13359
13360     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
13361       SDValue X = N0.getOperand(i);
13362       if (!X.isUndef()) {
13363         V = X;
13364         Idx = i;
13365         NumDefs++;
13366       }
13367       // Stop if more than one members are non-undef.
13368       if (NumDefs > 1)
13369         break;
13370
13371       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
13372                                      VT.getVectorElementType(),
13373                                      X.getValueType().getVectorElementCount()));
13374     }
13375
13376     if (NumDefs == 0)
13377       return DAG.getUNDEF(VT);
13378
13379     if (NumDefs == 1) {
13380       assert(V.getNode() && "The single defined operand is empty!");
13381       SmallVector<SDValue, 8> Opnds;
13382       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
13383         if (i != Idx) {
13384           Opnds.push_back(DAG.getUNDEF(VTs[i]));
13385           continue;
13386         }
13387         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
13388         AddToWorklist(NV.getNode());
13389         Opnds.push_back(NV);
13390       }
13391       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
13392     }
13393   }
13394
13395   // Fold truncate of a bitcast of a vector to an extract of the low vector
13396   // element.
13397   //
13398   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
13399   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
13400     SDValue VecSrc = N0.getOperand(0);
13401     EVT VecSrcVT = VecSrc.getValueType();
13402     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
13403         (!LegalOperations ||
13404          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
13405       SDLoc SL(N);
13406
13407       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
13408       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
13409                          DAG.getVectorIdxConstant(Idx, SL));
13410     }
13411   }
13412
13413   // Simplify the operands using demanded-bits information.
13414   if (SimplifyDemandedBits(SDValue(N, 0)))
13415     return SDValue(N, 0);
13416
13417   // fold (truncate (extract_subvector(ext x))) ->
13418   //      (extract_subvector x)
13419   // TODO: This can be generalized to cover cases where the truncate and extract
13420   // do not fully cancel each other out.
13421   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
13422     SDValue N00 = N0.getOperand(0);
13423     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
13424         N00.getOpcode() == ISD::ZERO_EXTEND ||
13425         N00.getOpcode() == ISD::ANY_EXTEND) {
13426       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
13427           VT.getVectorElementType())
13428         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
13429                            N00.getOperand(0), N0.getOperand(1));
13430     }
13431   }
13432
13433   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13434     return NewVSel;
13435
13436   // Narrow a suitable binary operation with a non-opaque constant operand by
13437   // moving it ahead of the truncate. This is limited to pre-legalization
13438   // because targets may prefer a wider type during later combines and invert
13439   // this transform.
13440   switch (N0.getOpcode()) {
13441   case ISD::ADD:
13442   case ISD::SUB:
13443   case ISD::MUL:
13444   case ISD::AND:
13445   case ISD::OR:
13446   case ISD::XOR:
13447     if (!LegalOperations && N0.hasOneUse() &&
13448         (isConstantOrConstantVector(N0.getOperand(0), true) ||
13449          isConstantOrConstantVector(N0.getOperand(1), true))) {
13450       // TODO: We already restricted this to pre-legalization, but for vectors
13451       // we are extra cautious to not create an unsupported operation.
13452       // Target-specific changes are likely needed to avoid regressions here.
13453       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
13454         SDLoc DL(N);
13455         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13456         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13457         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
13458       }
13459     }
13460     break;
13461   case ISD::ADDE:
13462   case ISD::ADDCARRY:
13463     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
13464     // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
13465     // When the adde's carry is not used.
13466     // We only do for addcarry before legalize operation
13467     if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
13468          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
13469         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
13470       SDLoc DL(N);
13471       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13472       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13473       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
13474       return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
13475     }
13476     break;
13477   case ISD::USUBSAT:
13478     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
13479     // enough to know that the upper bits are zero we must ensure that we don't
13480     // introduce an extra truncate.
13481     if (!LegalOperations && N0.hasOneUse() &&
13482         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13483         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
13484             VT.getScalarSizeInBits() &&
13485         hasOperation(N0.getOpcode(), VT)) {
13486       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
13487                                  DAG, SDLoc(N));
13488     }
13489     break;
13490   }
13491
13492   return SDValue();
13493 }
13494
13495 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13496   SDValue Elt = N->getOperand(i);
13497   if (Elt.getOpcode() != ISD::MERGE_VALUES)
13498     return Elt.getNode();
13499   return Elt.getOperand(Elt.getResNo()).getNode();
13500 }
13501
13502 /// build_pair (load, load) -> load
13503 /// if load locations are consecutive.
13504 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13505   assert(N->getOpcode() == ISD::BUILD_PAIR);
13506
13507   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13508   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13509
13510   // A BUILD_PAIR is always having the least significant part in elt 0 and the
13511   // most significant part in elt 1. So when combining into one large load, we
13512   // need to consider the endianness.
13513   if (DAG.getDataLayout().isBigEndian())
13514     std::swap(LD1, LD2);
13515
13516   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13517       !LD1->hasOneUse() || !LD2->hasOneUse() ||
13518       LD1->getAddressSpace() != LD2->getAddressSpace())
13519     return SDValue();
13520
13521   bool LD1Fast = false;
13522   EVT LD1VT = LD1->getValueType(0);
13523   unsigned LD1Bytes = LD1VT.getStoreSize();
13524   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13525       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13526       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13527                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13528     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13529                        LD1->getPointerInfo(), LD1->getAlign());
13530
13531   return SDValue();
13532 }
13533
13534 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13535   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13536   // and Lo parts; on big-endian machines it doesn't.
13537   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13538 }
13539
13540 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
13541                                     const TargetLowering &TLI) {
13542   // If this is not a bitcast to an FP type or if the target doesn't have
13543   // IEEE754-compliant FP logic, we're done.
13544   EVT VT = N->getValueType(0);
13545   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13546     return SDValue();
13547
13548   // TODO: Handle cases where the integer constant is a different scalar
13549   // bitwidth to the FP.
13550   SDValue N0 = N->getOperand(0);
13551   EVT SourceVT = N0.getValueType();
13552   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13553     return SDValue();
13554
13555   unsigned FPOpcode;
13556   APInt SignMask;
13557   switch (N0.getOpcode()) {
13558   case ISD::AND:
13559     FPOpcode = ISD::FABS;
13560     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13561     break;
13562   case ISD::XOR:
13563     FPOpcode = ISD::FNEG;
13564     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13565     break;
13566   case ISD::OR:
13567     FPOpcode = ISD::FABS;
13568     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13569     break;
13570   default:
13571     return SDValue();
13572   }
13573
13574   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13575   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13576   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13577   //   fneg (fabs X)
13578   SDValue LogicOp0 = N0.getOperand(0);
13579   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13580   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13581       LogicOp0.getOpcode() == ISD::BITCAST &&
13582       LogicOp0.getOperand(0).getValueType() == VT) {
13583     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13584     NumFPLogicOpsConv++;
13585     if (N0.getOpcode() == ISD::OR)
13586       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13587     return FPOp;
13588   }
13589
13590   return SDValue();
13591 }
13592
13593 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13594   SDValue N0 = N->getOperand(0);
13595   EVT VT = N->getValueType(0);
13596
13597   if (N0.isUndef())
13598     return DAG.getUNDEF(VT);
13599
13600   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13601   // Only do this before legalize types, unless both types are integer and the
13602   // scalar type is legal. Only do this before legalize ops, since the target
13603   // maybe depending on the bitcast.
13604   // First check to see if this is all constant.
13605   // TODO: Support FP bitcasts after legalize types.
13606   if (VT.isVector() &&
13607       (!LegalTypes ||
13608        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13609         TLI.isTypeLegal(VT.getVectorElementType()))) &&
13610       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
13611       cast<BuildVectorSDNode>(N0)->isConstant())
13612     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13613                                              VT.getVectorElementType());
13614
13615   // If the input is a constant, let getNode fold it.
13616   if (isIntOrFPConstant(N0)) {
13617     // If we can't allow illegal operations, we need to check that this is just
13618     // a fp -> int or int -> conversion and that the resulting operation will
13619     // be legal.
13620     if (!LegalOperations ||
13621         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13622          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13623         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13624          TLI.isOperationLegal(ISD::Constant, VT))) {
13625       SDValue C = DAG.getBitcast(VT, N0);
13626       if (C.getNode() != N)
13627         return C;
13628     }
13629   }
13630
13631   // (conv (conv x, t1), t2) -> (conv x, t2)
13632   if (N0.getOpcode() == ISD::BITCAST)
13633     return DAG.getBitcast(VT, N0.getOperand(0));
13634
13635   // fold (conv (load x)) -> (load (conv*)x)
13636   // If the resultant load doesn't need a higher alignment than the original!
13637   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13638       // Do not remove the cast if the types differ in endian layout.
13639       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13640           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13641       // If the load is volatile, we only want to change the load type if the
13642       // resulting load is legal. Otherwise we might increase the number of
13643       // memory accesses. We don't care if the original type was legal or not
13644       // as we assume software couldn't rely on the number of accesses of an
13645       // illegal type.
13646       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13647        TLI.isOperationLegal(ISD::LOAD, VT))) {
13648     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13649
13650     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13651                                     *LN0->getMemOperand())) {
13652       SDValue Load =
13653           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13654                       LN0->getPointerInfo(), LN0->getAlign(),
13655                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13656       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13657       return Load;
13658     }
13659   }
13660
13661   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13662     return V;
13663
13664   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13665   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13666   //
13667   // For ppc_fp128:
13668   // fold (bitcast (fneg x)) ->
13669   //     flipbit = signbit
13670   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13671   //
13672   // fold (bitcast (fabs x)) ->
13673   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
13674   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13675   // This often reduces constant pool loads.
13676   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13677        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13678       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
13679       !N0.getValueType().isVector()) {
13680     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13681     AddToWorklist(NewConv.getNode());
13682
13683     SDLoc DL(N);
13684     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13685       assert(VT.getSizeInBits() == 128);
13686       SDValue SignBit = DAG.getConstant(
13687           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13688       SDValue FlipBit;
13689       if (N0.getOpcode() == ISD::FNEG) {
13690         FlipBit = SignBit;
13691         AddToWorklist(FlipBit.getNode());
13692       } else {
13693         assert(N0.getOpcode() == ISD::FABS);
13694         SDValue Hi =
13695             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13696                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13697                                               SDLoc(NewConv)));
13698         AddToWorklist(Hi.getNode());
13699         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13700         AddToWorklist(FlipBit.getNode());
13701       }
13702       SDValue FlipBits =
13703           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13704       AddToWorklist(FlipBits.getNode());
13705       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13706     }
13707     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13708     if (N0.getOpcode() == ISD::FNEG)
13709       return DAG.getNode(ISD::XOR, DL, VT,
13710                          NewConv, DAG.getConstant(SignBit, DL, VT));
13711     assert(N0.getOpcode() == ISD::FABS);
13712     return DAG.getNode(ISD::AND, DL, VT,
13713                        NewConv, DAG.getConstant(~SignBit, DL, VT));
13714   }
13715
13716   // fold (bitconvert (fcopysign cst, x)) ->
13717   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
13718   // Note that we don't handle (copysign x, cst) because this can always be
13719   // folded to an fneg or fabs.
13720   //
13721   // For ppc_fp128:
13722   // fold (bitcast (fcopysign cst, x)) ->
13723   //     flipbit = (and (extract_element
13724   //                     (xor (bitcast cst), (bitcast x)), 0),
13725   //                    signbit)
13726   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
13727   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
13728       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
13729       !VT.isVector()) {
13730     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13731     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13732     if (isTypeLegal(IntXVT)) {
13733       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13734       AddToWorklist(X.getNode());
13735
13736       // If X has a different width than the result/lhs, sext it or truncate it.
13737       unsigned VTWidth = VT.getSizeInBits();
13738       if (OrigXWidth < VTWidth) {
13739         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13740         AddToWorklist(X.getNode());
13741       } else if (OrigXWidth > VTWidth) {
13742         // To get the sign bit in the right place, we have to shift it right
13743         // before truncating.
13744         SDLoc DL(X);
13745         X = DAG.getNode(ISD::SRL, DL,
13746                         X.getValueType(), X,
13747                         DAG.getConstant(OrigXWidth-VTWidth, DL,
13748                                         X.getValueType()));
13749         AddToWorklist(X.getNode());
13750         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13751         AddToWorklist(X.getNode());
13752       }
13753
13754       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13755         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13756         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13757         AddToWorklist(Cst.getNode());
13758         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13759         AddToWorklist(X.getNode());
13760         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13761         AddToWorklist(XorResult.getNode());
13762         SDValue XorResult64 = DAG.getNode(
13763             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13764             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13765                                   SDLoc(XorResult)));
13766         AddToWorklist(XorResult64.getNode());
13767         SDValue FlipBit =
13768             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13769                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13770         AddToWorklist(FlipBit.getNode());
13771         SDValue FlipBits =
13772             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13773         AddToWorklist(FlipBits.getNode());
13774         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13775       }
13776       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13777       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13778                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
13779       AddToWorklist(X.getNode());
13780
13781       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13782       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13783                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13784       AddToWorklist(Cst.getNode());
13785
13786       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13787     }
13788   }
13789
13790   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13791   if (N0.getOpcode() == ISD::BUILD_PAIR)
13792     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13793       return CombineLD;
13794
13795   // Remove double bitcasts from shuffles - this is often a legacy of
13796   // XformToShuffleWithZero being used to combine bitmaskings (of
13797   // float vectors bitcast to integer vectors) into shuffles.
13798   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13799   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13800       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13801       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13802       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13803     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13804
13805     // If operands are a bitcast, peek through if it casts the original VT.
13806     // If operands are a constant, just bitcast back to original VT.
13807     auto PeekThroughBitcast = [&](SDValue Op) {
13808       if (Op.getOpcode() == ISD::BITCAST &&
13809           Op.getOperand(0).getValueType() == VT)
13810         return SDValue(Op.getOperand(0));
13811       if (Op.isUndef() || isAnyConstantBuildVector(Op))
13812         return DAG.getBitcast(VT, Op);
13813       return SDValue();
13814     };
13815
13816     // FIXME: If either input vector is bitcast, try to convert the shuffle to
13817     // the result type of this bitcast. This would eliminate at least one
13818     // bitcast. See the transform in InstCombine.
13819     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13820     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13821     if (!(SV0 && SV1))
13822       return SDValue();
13823
13824     int MaskScale =
13825         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13826     SmallVector<int, 8> NewMask;
13827     for (int M : SVN->getMask())
13828       for (int i = 0; i != MaskScale; ++i)
13829         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13830
13831     SDValue LegalShuffle =
13832         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13833     if (LegalShuffle)
13834       return LegalShuffle;
13835   }
13836
13837   return SDValue();
13838 }
13839
13840 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13841   EVT VT = N->getValueType(0);
13842   return CombineConsecutiveLoads(N, VT);
13843 }
13844
13845 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13846   SDValue N0 = N->getOperand(0);
13847
13848   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13849     return N0;
13850
13851   // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
13852   // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
13853   if (N0.getOpcode() == ISD::BITCAST)
13854     return DAG.getBitcast(N->getValueType(0),
13855                           DAG.getNode(ISD::FREEZE, SDLoc(N0),
13856                                       N0.getOperand(0).getValueType(),
13857                                       N0.getOperand(0)));
13858
13859   return SDValue();
13860 }
13861
13862 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13863 /// operands. DstEltVT indicates the destination element value type.
13864 SDValue DAGCombiner::
13865 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13866   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13867
13868   // If this is already the right type, we're done.
13869   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13870
13871   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13872   unsigned DstBitSize = DstEltVT.getSizeInBits();
13873
13874   // If this is a conversion of N elements of one type to N elements of another
13875   // type, convert each element.  This handles FP<->INT cases.
13876   if (SrcBitSize == DstBitSize) {
13877     SmallVector<SDValue, 8> Ops;
13878     for (SDValue Op : BV->op_values()) {
13879       // If the vector element type is not legal, the BUILD_VECTOR operands
13880       // are promoted and implicitly truncated.  Make that explicit here.
13881       if (Op.getValueType() != SrcEltVT)
13882         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13883       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13884       AddToWorklist(Ops.back().getNode());
13885     }
13886     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13887                               BV->getValueType(0).getVectorNumElements());
13888     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13889   }
13890
13891   // Otherwise, we're growing or shrinking the elements.  To avoid having to
13892   // handle annoying details of growing/shrinking FP values, we convert them to
13893   // int first.
13894   if (SrcEltVT.isFloatingPoint()) {
13895     // Convert the input float vector to a int vector where the elements are the
13896     // same sizes.
13897     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13898     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13899     SrcEltVT = IntVT;
13900   }
13901
13902   // Now we know the input is an integer vector.  If the output is a FP type,
13903   // convert to integer first, then to FP of the right size.
13904   if (DstEltVT.isFloatingPoint()) {
13905     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13906     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13907
13908     // Next, convert to FP elements of the same size.
13909     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13910   }
13911
13912   // Okay, we know the src/dst types are both integers of differing types.
13913   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13914
13915   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13916   // BuildVectorSDNode?
13917   auto *BVN = cast<BuildVectorSDNode>(BV);
13918
13919   // Extract the constant raw bit data.
13920   BitVector UndefElements;
13921   SmallVector<APInt> RawBits;
13922   bool IsLE = DAG.getDataLayout().isLittleEndian();
13923   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13924     return SDValue();
13925
13926   SDLoc DL(BV);
13927   SmallVector<SDValue, 8> Ops;
13928   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13929     if (UndefElements[I])
13930       Ops.push_back(DAG.getUNDEF(DstEltVT));
13931     else
13932       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13933   }
13934
13935   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13936   return DAG.getBuildVector(VT, DL, Ops);
13937 }
13938
13939 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13940 // `N`
13941 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13942   assert(N.getOpcode() == ISD::FMUL);
13943
13944   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13945          N->getFlags().hasAllowContract();
13946 }
13947
13948 // Returns true if `N` can assume no infinities involved in its computation.
13949 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13950   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
13951 }
13952
13953 /// Try to perform FMA combining on a given FADD node.
13954 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13955   SDValue N0 = N->getOperand(0);
13956   SDValue N1 = N->getOperand(1);
13957   EVT VT = N->getValueType(0);
13958   SDLoc SL(N);
13959
13960   const TargetOptions &Options = DAG.getTarget().Options;
13961
13962   // Floating-point multiply-add with intermediate rounding.
13963   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13964
13965   // Floating-point multiply-add without intermediate rounding.
13966   bool HasFMA =
13967       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13968       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13969
13970   // No valid opcode, do not combine.
13971   if (!HasFMAD && !HasFMA)
13972     return SDValue();
13973
13974   bool CanReassociate =
13975       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13976   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13977                               Options.UnsafeFPMath || HasFMAD);
13978   // If the addition is not contractable, do not combine.
13979   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13980     return SDValue();
13981
13982   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13983     return SDValue();
13984
13985   // Always prefer FMAD to FMA for precision.
13986   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13987   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13988
13989   auto isFusedOp = [&](SDValue N) {
13990     unsigned Opcode = N.getOpcode();
13991     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13992   };
13993
13994   // Is the node an FMUL and contractable either due to global flags or
13995   // SDNodeFlags.
13996   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13997     if (N.getOpcode() != ISD::FMUL)
13998       return false;
13999     return AllowFusionGlobally || N->getFlags().hasAllowContract();
14000   };
14001   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
14002   // prefer to fold the multiply with fewer uses.
14003   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
14004     if (N0->use_size() > N1->use_size())
14005       std::swap(N0, N1);
14006   }
14007
14008   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
14009   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
14010     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14011                        N0.getOperand(1), N1);
14012   }
14013
14014   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
14015   // Note: Commutes FADD operands.
14016   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
14017     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
14018                        N1.getOperand(1), N0);
14019   }
14020
14021   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
14022   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
14023   // This requires reassociation because it changes the order of operations.
14024   SDValue FMA, E;
14025   if (CanReassociate && isFusedOp(N0) &&
14026       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
14027       N0.getOperand(2).hasOneUse()) {
14028     FMA = N0;
14029     E = N1;
14030   } else if (CanReassociate && isFusedOp(N1) &&
14031              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
14032              N1.getOperand(2).hasOneUse()) {
14033     FMA = N1;
14034     E = N0;
14035   }
14036   if (FMA && E) {
14037     SDValue A = FMA.getOperand(0);
14038     SDValue B = FMA.getOperand(1);
14039     SDValue C = FMA.getOperand(2).getOperand(0);
14040     SDValue D = FMA.getOperand(2).getOperand(1);
14041     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
14042     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
14043   }
14044
14045   // Look through FP_EXTEND nodes to do more combining.
14046
14047   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
14048   if (N0.getOpcode() == ISD::FP_EXTEND) {
14049     SDValue N00 = N0.getOperand(0);
14050     if (isContractableFMUL(N00) &&
14051         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14052                             N00.getValueType())) {
14053       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14054                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14055                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14056                          N1);
14057     }
14058   }
14059
14060   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
14061   // Note: Commutes FADD operands.
14062   if (N1.getOpcode() == ISD::FP_EXTEND) {
14063     SDValue N10 = N1.getOperand(0);
14064     if (isContractableFMUL(N10) &&
14065         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14066                             N10.getValueType())) {
14067       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14068                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
14069                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
14070                          N0);
14071     }
14072   }
14073
14074   // More folding opportunities when target permits.
14075   if (Aggressive) {
14076     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
14077     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
14078     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14079                                     SDValue Z) {
14080       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
14081                          DAG.getNode(PreferredFusedOpcode, SL, VT,
14082                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14083                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
14084                                      Z));
14085     };
14086     if (isFusedOp(N0)) {
14087       SDValue N02 = N0.getOperand(2);
14088       if (N02.getOpcode() == ISD::FP_EXTEND) {
14089         SDValue N020 = N02.getOperand(0);
14090         if (isContractableFMUL(N020) &&
14091             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14092                                 N020.getValueType())) {
14093           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
14094                                       N020.getOperand(0), N020.getOperand(1),
14095                                       N1);
14096         }
14097       }
14098     }
14099
14100     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
14101     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
14102     // FIXME: This turns two single-precision and one double-precision
14103     // operation into two double-precision operations, which might not be
14104     // interesting for all targets, especially GPUs.
14105     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14106                                     SDValue Z) {
14107       return DAG.getNode(
14108           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
14109           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
14110           DAG.getNode(PreferredFusedOpcode, SL, VT,
14111                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14112                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
14113     };
14114     if (N0.getOpcode() == ISD::FP_EXTEND) {
14115       SDValue N00 = N0.getOperand(0);
14116       if (isFusedOp(N00)) {
14117         SDValue N002 = N00.getOperand(2);
14118         if (isContractableFMUL(N002) &&
14119             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14120                                 N00.getValueType())) {
14121           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
14122                                       N002.getOperand(0), N002.getOperand(1),
14123                                       N1);
14124         }
14125       }
14126     }
14127
14128     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
14129     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
14130     if (isFusedOp(N1)) {
14131       SDValue N12 = N1.getOperand(2);
14132       if (N12.getOpcode() == ISD::FP_EXTEND) {
14133         SDValue N120 = N12.getOperand(0);
14134         if (isContractableFMUL(N120) &&
14135             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14136                                 N120.getValueType())) {
14137           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
14138                                       N120.getOperand(0), N120.getOperand(1),
14139                                       N0);
14140         }
14141       }
14142     }
14143
14144     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
14145     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
14146     // FIXME: This turns two single-precision and one double-precision
14147     // operation into two double-precision operations, which might not be
14148     // interesting for all targets, especially GPUs.
14149     if (N1.getOpcode() == ISD::FP_EXTEND) {
14150       SDValue N10 = N1.getOperand(0);
14151       if (isFusedOp(N10)) {
14152         SDValue N102 = N10.getOperand(2);
14153         if (isContractableFMUL(N102) &&
14154             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14155                                 N10.getValueType())) {
14156           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
14157                                       N102.getOperand(0), N102.getOperand(1),
14158                                       N0);
14159         }
14160       }
14161     }
14162   }
14163
14164   return SDValue();
14165 }
14166
14167 /// Try to perform FMA combining on a given FSUB node.
14168 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
14169   SDValue N0 = N->getOperand(0);
14170   SDValue N1 = N->getOperand(1);
14171   EVT VT = N->getValueType(0);
14172   SDLoc SL(N);
14173
14174   const TargetOptions &Options = DAG.getTarget().Options;
14175   // Floating-point multiply-add with intermediate rounding.
14176   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
14177
14178   // Floating-point multiply-add without intermediate rounding.
14179   bool HasFMA =
14180       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14181       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14182
14183   // No valid opcode, do not combine.
14184   if (!HasFMAD && !HasFMA)
14185     return SDValue();
14186
14187   const SDNodeFlags Flags = N->getFlags();
14188   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
14189                               Options.UnsafeFPMath || HasFMAD);
14190
14191   // If the subtraction is not contractable, do not combine.
14192   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
14193     return SDValue();
14194
14195   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
14196     return SDValue();
14197
14198   // Always prefer FMAD to FMA for precision.
14199   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14200   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14201   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
14202
14203   // Is the node an FMUL and contractable either due to global flags or
14204   // SDNodeFlags.
14205   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
14206     if (N.getOpcode() != ISD::FMUL)
14207       return false;
14208     return AllowFusionGlobally || N->getFlags().hasAllowContract();
14209   };
14210
14211   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14212   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
14213     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
14214       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
14215                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
14216     }
14217     return SDValue();
14218   };
14219
14220   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14221   // Note: Commutes FSUB operands.
14222   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
14223     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
14224       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14225                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
14226                          YZ.getOperand(1), X);
14227     }
14228     return SDValue();
14229   };
14230
14231   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
14232   // prefer to fold the multiply with fewer uses.
14233   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
14234       (N0->use_size() > N1->use_size())) {
14235     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
14236     if (SDValue V = tryToFoldXSubYZ(N0, N1))
14237       return V;
14238     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
14239     if (SDValue V = tryToFoldXYSubZ(N0, N1))
14240       return V;
14241   } else {
14242     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14243     if (SDValue V = tryToFoldXYSubZ(N0, N1))
14244       return V;
14245     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14246     if (SDValue V = tryToFoldXSubYZ(N0, N1))
14247       return V;
14248   }
14249
14250   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
14251   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
14252       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
14253     SDValue N00 = N0.getOperand(0).getOperand(0);
14254     SDValue N01 = N0.getOperand(0).getOperand(1);
14255     return DAG.getNode(PreferredFusedOpcode, SL, VT,
14256                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
14257                        DAG.getNode(ISD::FNEG, SL, VT, N1));
14258   }
14259
14260   // Look through FP_EXTEND nodes to do more combining.
14261
14262   // fold (fsub (fpext (fmul x, y)), z)
14263   //   -> (fma (fpext x), (fpext y), (fneg z))
14264   if (N0.getOpcode() == ISD::FP_EXTEND) {
14265     SDValue N00 = N0.getOperand(0);
14266     if (isContractableFMUL(N00) &&
14267         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14268                             N00.getValueType())) {
14269       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14270                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14271                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14272                          DAG.getNode(ISD::FNEG, SL, VT, N1));
14273     }
14274   }
14275
14276   // fold (fsub x, (fpext (fmul y, z)))
14277   //   -> (fma (fneg (fpext y)), (fpext z), x)
14278   // Note: Commutes FSUB operands.
14279   if (N1.getOpcode() == ISD::FP_EXTEND) {
14280     SDValue N10 = N1.getOperand(0);
14281     if (isContractableFMUL(N10) &&
14282         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14283                             N10.getValueType())) {
14284       return DAG.getNode(
14285           PreferredFusedOpcode, SL, VT,
14286           DAG.getNode(ISD::FNEG, SL, VT,
14287                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
14288           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
14289     }
14290   }
14291
14292   // fold (fsub (fpext (fneg (fmul, x, y))), z)
14293   //   -> (fneg (fma (fpext x), (fpext y), z))
14294   // Note: This could be removed with appropriate canonicalization of the
14295   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14296   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14297   // from implementing the canonicalization in visitFSUB.
14298   if (N0.getOpcode() == ISD::FP_EXTEND) {
14299     SDValue N00 = N0.getOperand(0);
14300     if (N00.getOpcode() == ISD::FNEG) {
14301       SDValue N000 = N00.getOperand(0);
14302       if (isContractableFMUL(N000) &&
14303           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14304                               N00.getValueType())) {
14305         return DAG.getNode(
14306             ISD::FNEG, SL, VT,
14307             DAG.getNode(PreferredFusedOpcode, SL, VT,
14308                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14309                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14310                         N1));
14311       }
14312     }
14313   }
14314
14315   // fold (fsub (fneg (fpext (fmul, x, y))), z)
14316   //   -> (fneg (fma (fpext x)), (fpext y), z)
14317   // Note: This could be removed with appropriate canonicalization of the
14318   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14319   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14320   // from implementing the canonicalization in visitFSUB.
14321   if (N0.getOpcode() == ISD::FNEG) {
14322     SDValue N00 = N0.getOperand(0);
14323     if (N00.getOpcode() == ISD::FP_EXTEND) {
14324       SDValue N000 = N00.getOperand(0);
14325       if (isContractableFMUL(N000) &&
14326           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14327                               N000.getValueType())) {
14328         return DAG.getNode(
14329             ISD::FNEG, SL, VT,
14330             DAG.getNode(PreferredFusedOpcode, SL, VT,
14331                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14332                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14333                         N1));
14334       }
14335     }
14336   }
14337
14338   auto isReassociable = [Options](SDNode *N) {
14339     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14340   };
14341
14342   auto isContractableAndReassociableFMUL = [isContractableFMUL,
14343                                             isReassociable](SDValue N) {
14344     return isContractableFMUL(N) && isReassociable(N.getNode());
14345   };
14346
14347   auto isFusedOp = [&](SDValue N) {
14348     unsigned Opcode = N.getOpcode();
14349     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
14350   };
14351
14352   // More folding opportunities when target permits.
14353   if (Aggressive && isReassociable(N)) {
14354     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
14355     // fold (fsub (fma x, y, (fmul u, v)), z)
14356     //   -> (fma x, y (fma u, v, (fneg z)))
14357     if (CanFuse && isFusedOp(N0) &&
14358         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
14359         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
14360       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14361                          N0.getOperand(1),
14362                          DAG.getNode(PreferredFusedOpcode, SL, VT,
14363                                      N0.getOperand(2).getOperand(0),
14364                                      N0.getOperand(2).getOperand(1),
14365                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
14366     }
14367
14368     // fold (fsub x, (fma y, z, (fmul u, v)))
14369     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
14370     if (CanFuse && isFusedOp(N1) &&
14371         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
14372         N1->hasOneUse() && NoSignedZero) {
14373       SDValue N20 = N1.getOperand(2).getOperand(0);
14374       SDValue N21 = N1.getOperand(2).getOperand(1);
14375       return DAG.getNode(
14376           PreferredFusedOpcode, SL, VT,
14377           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14378           DAG.getNode(PreferredFusedOpcode, SL, VT,
14379                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
14380     }
14381
14382     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
14383     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
14384     if (isFusedOp(N0) && N0->hasOneUse()) {
14385       SDValue N02 = N0.getOperand(2);
14386       if (N02.getOpcode() == ISD::FP_EXTEND) {
14387         SDValue N020 = N02.getOperand(0);
14388         if (isContractableAndReassociableFMUL(N020) &&
14389             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14390                                 N020.getValueType())) {
14391           return DAG.getNode(
14392               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
14393               DAG.getNode(
14394                   PreferredFusedOpcode, SL, VT,
14395                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
14396                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
14397                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14398         }
14399       }
14400     }
14401
14402     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
14403     //   -> (fma (fpext x), (fpext y),
14404     //           (fma (fpext u), (fpext v), (fneg z)))
14405     // FIXME: This turns two single-precision and one double-precision
14406     // operation into two double-precision operations, which might not be
14407     // interesting for all targets, especially GPUs.
14408     if (N0.getOpcode() == ISD::FP_EXTEND) {
14409       SDValue N00 = N0.getOperand(0);
14410       if (isFusedOp(N00)) {
14411         SDValue N002 = N00.getOperand(2);
14412         if (isContractableAndReassociableFMUL(N002) &&
14413             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14414                                 N00.getValueType())) {
14415           return DAG.getNode(
14416               PreferredFusedOpcode, SL, VT,
14417               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14418               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14419               DAG.getNode(
14420                   PreferredFusedOpcode, SL, VT,
14421                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
14422                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
14423                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14424         }
14425       }
14426     }
14427
14428     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
14429     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
14430     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
14431         N1->hasOneUse()) {
14432       SDValue N120 = N1.getOperand(2).getOperand(0);
14433       if (isContractableAndReassociableFMUL(N120) &&
14434           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14435                               N120.getValueType())) {
14436         SDValue N1200 = N120.getOperand(0);
14437         SDValue N1201 = N120.getOperand(1);
14438         return DAG.getNode(
14439             PreferredFusedOpcode, SL, VT,
14440             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14441             DAG.getNode(PreferredFusedOpcode, SL, VT,
14442                         DAG.getNode(ISD::FNEG, SL, VT,
14443                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
14444                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
14445       }
14446     }
14447
14448     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
14449     //   -> (fma (fneg (fpext y)), (fpext z),
14450     //           (fma (fneg (fpext u)), (fpext v), x))
14451     // FIXME: This turns two single-precision and one double-precision
14452     // operation into two double-precision operations, which might not be
14453     // interesting for all targets, especially GPUs.
14454     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
14455       SDValue CvtSrc = N1.getOperand(0);
14456       SDValue N100 = CvtSrc.getOperand(0);
14457       SDValue N101 = CvtSrc.getOperand(1);
14458       SDValue N102 = CvtSrc.getOperand(2);
14459       if (isContractableAndReassociableFMUL(N102) &&
14460           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14461                               CvtSrc.getValueType())) {
14462         SDValue N1020 = N102.getOperand(0);
14463         SDValue N1021 = N102.getOperand(1);
14464         return DAG.getNode(
14465             PreferredFusedOpcode, SL, VT,
14466             DAG.getNode(ISD::FNEG, SL, VT,
14467                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
14468             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
14469             DAG.getNode(PreferredFusedOpcode, SL, VT,
14470                         DAG.getNode(ISD::FNEG, SL, VT,
14471                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
14472                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
14473       }
14474     }
14475   }
14476
14477   return SDValue();
14478 }
14479
14480 /// Try to perform FMA combining on a given FMUL node based on the distributive
14481 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
14482 /// subtraction instead of addition).
14483 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
14484   SDValue N0 = N->getOperand(0);
14485   SDValue N1 = N->getOperand(1);
14486   EVT VT = N->getValueType(0);
14487   SDLoc SL(N);
14488
14489   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
14490
14491   const TargetOptions &Options = DAG.getTarget().Options;
14492
14493   // The transforms below are incorrect when x == 0 and y == inf, because the
14494   // intermediate multiplication produces a nan.
14495   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
14496   if (!hasNoInfs(Options, FAdd))
14497     return SDValue();
14498
14499   // Floating-point multiply-add without intermediate rounding.
14500   bool HasFMA =
14501       isContractableFMUL(Options, SDValue(N, 0)) &&
14502       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14503       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14504
14505   // Floating-point multiply-add with intermediate rounding. This can result
14506   // in a less precise result due to the changed rounding order.
14507   bool HasFMAD = Options.UnsafeFPMath &&
14508                  (LegalOperations && TLI.isFMADLegal(DAG, N));
14509
14510   // No valid opcode, do not combine.
14511   if (!HasFMAD && !HasFMA)
14512     return SDValue();
14513
14514   // Always prefer FMAD to FMA for precision.
14515   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14516   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14517
14518   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14519   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14520   auto FuseFADD = [&](SDValue X, SDValue Y) {
14521     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14522       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14523         if (C->isExactlyValue(+1.0))
14524           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14525                              Y);
14526         if (C->isExactlyValue(-1.0))
14527           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14528                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14529       }
14530     }
14531     return SDValue();
14532   };
14533
14534   if (SDValue FMA = FuseFADD(N0, N1))
14535     return FMA;
14536   if (SDValue FMA = FuseFADD(N1, N0))
14537     return FMA;
14538
14539   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14540   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14541   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14542   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14543   auto FuseFSUB = [&](SDValue X, SDValue Y) {
14544     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14545       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14546         if (C0->isExactlyValue(+1.0))
14547           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14548                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14549                              Y);
14550         if (C0->isExactlyValue(-1.0))
14551           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14552                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14553                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14554       }
14555       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14556         if (C1->isExactlyValue(+1.0))
14557           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14558                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14559         if (C1->isExactlyValue(-1.0))
14560           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14561                              Y);
14562       }
14563     }
14564     return SDValue();
14565   };
14566
14567   if (SDValue FMA = FuseFSUB(N0, N1))
14568     return FMA;
14569   if (SDValue FMA = FuseFSUB(N1, N0))
14570     return FMA;
14571
14572   return SDValue();
14573 }
14574
14575 SDValue DAGCombiner::visitFADD(SDNode *N) {
14576   SDValue N0 = N->getOperand(0);
14577   SDValue N1 = N->getOperand(1);
14578   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14579   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14580   EVT VT = N->getValueType(0);
14581   SDLoc DL(N);
14582   const TargetOptions &Options = DAG.getTarget().Options;
14583   SDNodeFlags Flags = N->getFlags();
14584   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14585
14586   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14587     return R;
14588
14589   // fold (fadd c1, c2) -> c1 + c2
14590   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14591     return C;
14592
14593   // canonicalize constant to RHS
14594   if (N0CFP && !N1CFP)
14595     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14596
14597   // fold vector ops
14598   if (VT.isVector())
14599     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14600       return FoldedVOp;
14601
14602   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14603   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14604   if (N1C && N1C->isZero())
14605     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14606       return N0;
14607
14608   if (SDValue NewSel = foldBinOpIntoSelect(N))
14609     return NewSel;
14610
14611   // fold (fadd A, (fneg B)) -> (fsub A, B)
14612   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14613     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14614             N1, DAG, LegalOperations, ForCodeSize))
14615       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14616
14617   // fold (fadd (fneg A), B) -> (fsub B, A)
14618   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14619     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14620             N0, DAG, LegalOperations, ForCodeSize))
14621       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14622
14623   auto isFMulNegTwo = [](SDValue FMul) {
14624     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14625       return false;
14626     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14627     return C && C->isExactlyValue(-2.0);
14628   };
14629
14630   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14631   if (isFMulNegTwo(N0)) {
14632     SDValue B = N0.getOperand(0);
14633     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14634     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14635   }
14636   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14637   if (isFMulNegTwo(N1)) {
14638     SDValue B = N1.getOperand(0);
14639     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14640     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14641   }
14642
14643   // No FP constant should be created after legalization as Instruction
14644   // Selection pass has a hard time dealing with FP constants.
14645   bool AllowNewConst = (Level < AfterLegalizeDAG);
14646
14647   // If nnan is enabled, fold lots of things.
14648   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14649     // If allowed, fold (fadd (fneg x), x) -> 0.0
14650     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14651       return DAG.getConstantFP(0.0, DL, VT);
14652
14653     // If allowed, fold (fadd x, (fneg x)) -> 0.0
14654     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14655       return DAG.getConstantFP(0.0, DL, VT);
14656   }
14657
14658   // If 'unsafe math' or reassoc and nsz, fold lots of things.
14659   // TODO: break out portions of the transformations below for which Unsafe is
14660   //       considered and which do not require both nsz and reassoc
14661   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14662        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14663       AllowNewConst) {
14664     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14665     if (N1CFP && N0.getOpcode() == ISD::FADD &&
14666         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14667       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14668       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14669     }
14670
14671     // We can fold chains of FADD's of the same value into multiplications.
14672     // This transform is not safe in general because we are reducing the number
14673     // of rounding steps.
14674     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14675       if (N0.getOpcode() == ISD::FMUL) {
14676         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14677         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14678
14679         // (fadd (fmul x, c), x) -> (fmul x, c+1)
14680         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14681           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14682                                        DAG.getConstantFP(1.0, DL, VT));
14683           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14684         }
14685
14686         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14687         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14688             N1.getOperand(0) == N1.getOperand(1) &&
14689             N0.getOperand(0) == N1.getOperand(0)) {
14690           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14691                                        DAG.getConstantFP(2.0, DL, VT));
14692           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14693         }
14694       }
14695
14696       if (N1.getOpcode() == ISD::FMUL) {
14697         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14698         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14699
14700         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14701         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14702           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14703                                        DAG.getConstantFP(1.0, DL, VT));
14704           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14705         }
14706
14707         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14708         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14709             N0.getOperand(0) == N0.getOperand(1) &&
14710             N1.getOperand(0) == N0.getOperand(0)) {
14711           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14712                                        DAG.getConstantFP(2.0, DL, VT));
14713           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14714         }
14715       }
14716
14717       if (N0.getOpcode() == ISD::FADD) {
14718         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14719         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14720         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14721             (N0.getOperand(0) == N1)) {
14722           return DAG.getNode(ISD::FMUL, DL, VT, N1,
14723                              DAG.getConstantFP(3.0, DL, VT));
14724         }
14725       }
14726
14727       if (N1.getOpcode() == ISD::FADD) {
14728         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14729         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14730         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14731             N1.getOperand(0) == N0) {
14732           return DAG.getNode(ISD::FMUL, DL, VT, N0,
14733                              DAG.getConstantFP(3.0, DL, VT));
14734         }
14735       }
14736
14737       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14738       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14739           N0.getOperand(0) == N0.getOperand(1) &&
14740           N1.getOperand(0) == N1.getOperand(1) &&
14741           N0.getOperand(0) == N1.getOperand(0)) {
14742         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14743                            DAG.getConstantFP(4.0, DL, VT));
14744       }
14745     }
14746   } // enable-unsafe-fp-math
14747
14748   // FADD -> FMA combines:
14749   if (SDValue Fused = visitFADDForFMACombine(N)) {
14750     AddToWorklist(Fused.getNode());
14751     return Fused;
14752   }
14753   return SDValue();
14754 }
14755
14756 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14757   SDValue Chain = N->getOperand(0);
14758   SDValue N0 = N->getOperand(1);
14759   SDValue N1 = N->getOperand(2);
14760   EVT VT = N->getValueType(0);
14761   EVT ChainVT = N->getValueType(1);
14762   SDLoc DL(N);
14763   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14764
14765   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14766   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14767     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14768             N1, DAG, LegalOperations, ForCodeSize)) {
14769       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14770                          {Chain, N0, NegN1});
14771     }
14772
14773   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14774   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14775     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14776             N0, DAG, LegalOperations, ForCodeSize)) {
14777       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14778                          {Chain, N1, NegN0});
14779     }
14780   return SDValue();
14781 }
14782
14783 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14784   SDValue N0 = N->getOperand(0);
14785   SDValue N1 = N->getOperand(1);
14786   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14787   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14788   EVT VT = N->getValueType(0);
14789   SDLoc DL(N);
14790   const TargetOptions &Options = DAG.getTarget().Options;
14791   const SDNodeFlags Flags = N->getFlags();
14792   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14793
14794   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14795     return R;
14796
14797   // fold (fsub c1, c2) -> c1-c2
14798   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14799     return C;
14800
14801   // fold vector ops
14802   if (VT.isVector())
14803     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14804       return FoldedVOp;
14805
14806   if (SDValue NewSel = foldBinOpIntoSelect(N))
14807     return NewSel;
14808
14809   // (fsub A, 0) -> A
14810   if (N1CFP && N1CFP->isZero()) {
14811     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14812         Flags.hasNoSignedZeros()) {
14813       return N0;
14814     }
14815   }
14816
14817   if (N0 == N1) {
14818     // (fsub x, x) -> 0.0
14819     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14820       return DAG.getConstantFP(0.0f, DL, VT);
14821   }
14822
14823   // (fsub -0.0, N1) -> -N1
14824   if (N0CFP && N0CFP->isZero()) {
14825     if (N0CFP->isNegative() ||
14826         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14827       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14828       // flushed to zero, unless all users treat denorms as zero (DAZ).
14829       // FIXME: This transform will change the sign of a NaN and the behavior
14830       // of a signaling NaN. It is only valid when a NoNaN flag is present.
14831       DenormalMode DenormMode = DAG.getDenormalMode(VT);
14832       if (DenormMode == DenormalMode::getIEEE()) {
14833         if (SDValue NegN1 =
14834                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14835           return NegN1;
14836         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14837           return DAG.getNode(ISD::FNEG, DL, VT, N1);
14838       }
14839     }
14840   }
14841
14842   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14843        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14844       N1.getOpcode() == ISD::FADD) {
14845     // X - (X + Y) -> -Y
14846     if (N0 == N1->getOperand(0))
14847       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14848     // X - (Y + X) -> -Y
14849     if (N0 == N1->getOperand(1))
14850       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14851   }
14852
14853   // fold (fsub A, (fneg B)) -> (fadd A, B)
14854   if (SDValue NegN1 =
14855           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14856     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14857
14858   // FSUB -> FMA combines:
14859   if (SDValue Fused = visitFSUBForFMACombine(N)) {
14860     AddToWorklist(Fused.getNode());
14861     return Fused;
14862   }
14863
14864   return SDValue();
14865 }
14866
14867 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14868   SDValue N0 = N->getOperand(0);
14869   SDValue N1 = N->getOperand(1);
14870   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14871   EVT VT = N->getValueType(0);
14872   SDLoc DL(N);
14873   const TargetOptions &Options = DAG.getTarget().Options;
14874   const SDNodeFlags Flags = N->getFlags();
14875   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14876
14877   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14878     return R;
14879
14880   // fold (fmul c1, c2) -> c1*c2
14881   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14882     return C;
14883
14884   // canonicalize constant to RHS
14885   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14886      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14887     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14888
14889   // fold vector ops
14890   if (VT.isVector())
14891     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14892       return FoldedVOp;
14893
14894   if (SDValue NewSel = foldBinOpIntoSelect(N))
14895     return NewSel;
14896
14897   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14898     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14899     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14900         N0.getOpcode() == ISD::FMUL) {
14901       SDValue N00 = N0.getOperand(0);
14902       SDValue N01 = N0.getOperand(1);
14903       // Avoid an infinite loop by making sure that N00 is not a constant
14904       // (the inner multiply has not been constant folded yet).
14905       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14906           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14907         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14908         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14909       }
14910     }
14911
14912     // Match a special-case: we convert X * 2.0 into fadd.
14913     // fmul (fadd X, X), C -> fmul X, 2.0 * C
14914     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14915         N0.getOperand(0) == N0.getOperand(1)) {
14916       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14917       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14918       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14919     }
14920   }
14921
14922   // fold (fmul X, 2.0) -> (fadd X, X)
14923   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14924     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14925
14926   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14927   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14928     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14929       return DAG.getNode(ISD::FSUB, DL, VT,
14930                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14931     }
14932   }
14933
14934   // -N0 * -N1 --> N0 * N1
14935   TargetLowering::NegatibleCost CostN0 =
14936       TargetLowering::NegatibleCost::Expensive;
14937   TargetLowering::NegatibleCost CostN1 =
14938       TargetLowering::NegatibleCost::Expensive;
14939   SDValue NegN0 =
14940       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14941   SDValue NegN1 =
14942       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14943   if (NegN0 && NegN1 &&
14944       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14945        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14946     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14947
14948   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14949   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14950   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14951       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14952       TLI.isOperationLegal(ISD::FABS, VT)) {
14953     SDValue Select = N0, X = N1;
14954     if (Select.getOpcode() != ISD::SELECT)
14955       std::swap(Select, X);
14956
14957     SDValue Cond = Select.getOperand(0);
14958     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14959     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14960
14961     if (TrueOpnd && FalseOpnd &&
14962         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14963         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14964         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14965       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14966       switch (CC) {
14967       default: break;
14968       case ISD::SETOLT:
14969       case ISD::SETULT:
14970       case ISD::SETOLE:
14971       case ISD::SETULE:
14972       case ISD::SETLT:
14973       case ISD::SETLE:
14974         std::swap(TrueOpnd, FalseOpnd);
14975         LLVM_FALLTHROUGH;
14976       case ISD::SETOGT:
14977       case ISD::SETUGT:
14978       case ISD::SETOGE:
14979       case ISD::SETUGE:
14980       case ISD::SETGT:
14981       case ISD::SETGE:
14982         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14983             TLI.isOperationLegal(ISD::FNEG, VT))
14984           return DAG.getNode(ISD::FNEG, DL, VT,
14985                    DAG.getNode(ISD::FABS, DL, VT, X));
14986         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14987           return DAG.getNode(ISD::FABS, DL, VT, X);
14988
14989         break;
14990       }
14991     }
14992   }
14993
14994   // FMUL -> FMA combines:
14995   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14996     AddToWorklist(Fused.getNode());
14997     return Fused;
14998   }
14999
15000   return SDValue();
15001 }
15002
15003 SDValue DAGCombiner::visitFMA(SDNode *N) {
15004   SDValue N0 = N->getOperand(0);
15005   SDValue N1 = N->getOperand(1);
15006   SDValue N2 = N->getOperand(2);
15007   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15008   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
15009   EVT VT = N->getValueType(0);
15010   SDLoc DL(N);
15011   const TargetOptions &Options = DAG.getTarget().Options;
15012   // FMA nodes have flags that propagate to the created nodes.
15013   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15014
15015   bool CanReassociate =
15016       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15017
15018   // Constant fold FMA.
15019   if (isa<ConstantFPSDNode>(N0) &&
15020       isa<ConstantFPSDNode>(N1) &&
15021       isa<ConstantFPSDNode>(N2)) {
15022     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
15023   }
15024
15025   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
15026   TargetLowering::NegatibleCost CostN0 =
15027       TargetLowering::NegatibleCost::Expensive;
15028   TargetLowering::NegatibleCost CostN1 =
15029       TargetLowering::NegatibleCost::Expensive;
15030   SDValue NegN0 =
15031       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15032   SDValue NegN1 =
15033       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15034   if (NegN0 && NegN1 &&
15035       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15036        CostN1 == TargetLowering::NegatibleCost::Cheaper))
15037     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
15038
15039   // FIXME: use fast math flags instead of Options.UnsafeFPMath
15040   if (Options.UnsafeFPMath) {
15041     if (N0CFP && N0CFP->isZero())
15042       return N2;
15043     if (N1CFP && N1CFP->isZero())
15044       return N2;
15045   }
15046
15047   if (N0CFP && N0CFP->isExactlyValue(1.0))
15048     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
15049   if (N1CFP && N1CFP->isExactlyValue(1.0))
15050     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
15051
15052   // Canonicalize (fma c, x, y) -> (fma x, c, y)
15053   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15054      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15055     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
15056
15057   if (CanReassociate) {
15058     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
15059     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
15060         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
15061         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
15062       return DAG.getNode(ISD::FMUL, DL, VT, N0,
15063                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
15064     }
15065
15066     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
15067     if (N0.getOpcode() == ISD::FMUL &&
15068         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
15069         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
15070       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
15071                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
15072                          N2);
15073     }
15074   }
15075
15076   // (fma x, -1, y) -> (fadd (fneg x), y)
15077   if (N1CFP) {
15078     if (N1CFP->isExactlyValue(1.0))
15079       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
15080
15081     if (N1CFP->isExactlyValue(-1.0) &&
15082         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
15083       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
15084       AddToWorklist(RHSNeg.getNode());
15085       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
15086     }
15087
15088     // fma (fneg x), K, y -> fma x -K, y
15089     if (N0.getOpcode() == ISD::FNEG &&
15090         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15091          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
15092                                               ForCodeSize)))) {
15093       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
15094                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
15095     }
15096   }
15097
15098   if (CanReassociate) {
15099     // (fma x, c, x) -> (fmul x, (c+1))
15100     if (N1CFP && N0 == N2) {
15101       return DAG.getNode(
15102           ISD::FMUL, DL, VT, N0,
15103           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
15104     }
15105
15106     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
15107     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
15108       return DAG.getNode(
15109           ISD::FMUL, DL, VT, N0,
15110           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
15111     }
15112   }
15113
15114   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
15115   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
15116   if (!TLI.isFNegFree(VT))
15117     if (SDValue Neg = TLI.getCheaperNegatedExpression(
15118             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
15119       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
15120   return SDValue();
15121 }
15122
15123 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
15124 // reciprocal.
15125 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
15126 // Notice that this is not always beneficial. One reason is different targets
15127 // may have different costs for FDIV and FMUL, so sometimes the cost of two
15128 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
15129 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
15130 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
15131   // TODO: Limit this transform based on optsize/minsize - it always creates at
15132   //       least 1 extra instruction. But the perf win may be substantial enough
15133   //       that only minsize should restrict this.
15134   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
15135   const SDNodeFlags Flags = N->getFlags();
15136   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
15137     return SDValue();
15138
15139   // Skip if current node is a reciprocal/fneg-reciprocal.
15140   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
15141   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
15142   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
15143     return SDValue();
15144
15145   // Exit early if the target does not want this transform or if there can't
15146   // possibly be enough uses of the divisor to make the transform worthwhile.
15147   unsigned MinUses = TLI.combineRepeatedFPDivisors();
15148
15149   // For splat vectors, scale the number of uses by the splat factor. If we can
15150   // convert the division into a scalar op, that will likely be much faster.
15151   unsigned NumElts = 1;
15152   EVT VT = N->getValueType(0);
15153   if (VT.isVector() && DAG.isSplatValue(N1))
15154     NumElts = VT.getVectorMinNumElements();
15155
15156   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
15157     return SDValue();
15158
15159   // Find all FDIV users of the same divisor.
15160   // Use a set because duplicates may be present in the user list.
15161   SetVector<SDNode *> Users;
15162   for (auto *U : N1->uses()) {
15163     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
15164       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
15165       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
15166           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
15167           U->getFlags().hasAllowReassociation() &&
15168           U->getFlags().hasNoSignedZeros())
15169         continue;
15170
15171       // This division is eligible for optimization only if global unsafe math
15172       // is enabled or if this division allows reciprocal formation.
15173       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
15174         Users.insert(U);
15175     }
15176   }
15177
15178   // Now that we have the actual number of divisor uses, make sure it meets
15179   // the minimum threshold specified by the target.
15180   if ((Users.size() * NumElts) < MinUses)
15181     return SDValue();
15182
15183   SDLoc DL(N);
15184   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15185   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
15186
15187   // Dividend / Divisor -> Dividend * Reciprocal
15188   for (auto *U : Users) {
15189     SDValue Dividend = U->getOperand(0);
15190     if (Dividend != FPOne) {
15191       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
15192                                     Reciprocal, Flags);
15193       CombineTo(U, NewNode);
15194     } else if (U != Reciprocal.getNode()) {
15195       // In the absence of fast-math-flags, this user node is always the
15196       // same node as Reciprocal, but with FMF they may be different nodes.
15197       CombineTo(U, Reciprocal);
15198     }
15199   }
15200   return SDValue(N, 0);  // N was replaced.
15201 }
15202
15203 SDValue DAGCombiner::visitFDIV(SDNode *N) {
15204   SDValue N0 = N->getOperand(0);
15205   SDValue N1 = N->getOperand(1);
15206   EVT VT = N->getValueType(0);
15207   SDLoc DL(N);
15208   const TargetOptions &Options = DAG.getTarget().Options;
15209   SDNodeFlags Flags = N->getFlags();
15210   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15211
15212   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15213     return R;
15214
15215   // fold (fdiv c1, c2) -> c1/c2
15216   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
15217     return C;
15218
15219   // fold vector ops
15220   if (VT.isVector())
15221     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15222       return FoldedVOp;
15223
15224   if (SDValue NewSel = foldBinOpIntoSelect(N))
15225     return NewSel;
15226
15227   if (SDValue V = combineRepeatedFPDivisors(N))
15228     return V;
15229
15230   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
15231     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
15232     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15233       // Compute the reciprocal 1.0 / c2.
15234       const APFloat &N1APF = N1CFP->getValueAPF();
15235       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
15236       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
15237       // Only do the transform if the reciprocal is a legal fp immediate that
15238       // isn't too nasty (eg NaN, denormal, ...).
15239       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
15240           (!LegalOperations ||
15241            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
15242            // backend)... we should handle this gracefully after Legalize.
15243            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
15244            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15245            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
15246         return DAG.getNode(ISD::FMUL, DL, VT, N0,
15247                            DAG.getConstantFP(Recip, DL, VT));
15248     }
15249
15250     // If this FDIV is part of a reciprocal square root, it may be folded
15251     // into a target-specific square root estimate instruction.
15252     if (N1.getOpcode() == ISD::FSQRT) {
15253       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
15254         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15255     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
15256                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15257       if (SDValue RV =
15258               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15259         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
15260         AddToWorklist(RV.getNode());
15261         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15262       }
15263     } else if (N1.getOpcode() == ISD::FP_ROUND &&
15264                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15265       if (SDValue RV =
15266               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15267         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
15268         AddToWorklist(RV.getNode());
15269         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15270       }
15271     } else if (N1.getOpcode() == ISD::FMUL) {
15272       // Look through an FMUL. Even though this won't remove the FDIV directly,
15273       // it's still worthwhile to get rid of the FSQRT if possible.
15274       SDValue Sqrt, Y;
15275       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15276         Sqrt = N1.getOperand(0);
15277         Y = N1.getOperand(1);
15278       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
15279         Sqrt = N1.getOperand(1);
15280         Y = N1.getOperand(0);
15281       }
15282       if (Sqrt.getNode()) {
15283         // If the other multiply operand is known positive, pull it into the
15284         // sqrt. That will eliminate the division if we convert to an estimate.
15285         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
15286             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
15287           SDValue A;
15288           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
15289             A = Y.getOperand(0);
15290           else if (Y == Sqrt.getOperand(0))
15291             A = Y;
15292           if (A) {
15293             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
15294             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
15295             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
15296             SDValue AAZ =
15297                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
15298             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
15299               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
15300
15301             // Estimate creation failed. Clean up speculatively created nodes.
15302             recursivelyDeleteUnusedNodes(AAZ.getNode());
15303           }
15304         }
15305
15306         // We found a FSQRT, so try to make this fold:
15307         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
15308         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
15309           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
15310           AddToWorklist(Div.getNode());
15311           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
15312         }
15313       }
15314     }
15315
15316     // Fold into a reciprocal estimate and multiply instead of a real divide.
15317     if (Options.NoInfsFPMath || Flags.hasNoInfs())
15318       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
15319         return RV;
15320   }
15321
15322   // Fold X/Sqrt(X) -> Sqrt(X)
15323   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
15324       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
15325     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
15326       return N1;
15327
15328   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
15329   TargetLowering::NegatibleCost CostN0 =
15330       TargetLowering::NegatibleCost::Expensive;
15331   TargetLowering::NegatibleCost CostN1 =
15332       TargetLowering::NegatibleCost::Expensive;
15333   SDValue NegN0 =
15334       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15335   SDValue NegN1 =
15336       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15337   if (NegN0 && NegN1 &&
15338       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15339        CostN1 == TargetLowering::NegatibleCost::Cheaper))
15340     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
15341
15342   return SDValue();
15343 }
15344
15345 SDValue DAGCombiner::visitFREM(SDNode *N) {
15346   SDValue N0 = N->getOperand(0);
15347   SDValue N1 = N->getOperand(1);
15348   EVT VT = N->getValueType(0);
15349   SDNodeFlags Flags = N->getFlags();
15350   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15351
15352   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15353     return R;
15354
15355   // fold (frem c1, c2) -> fmod(c1,c2)
15356   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
15357     return C;
15358
15359   if (SDValue NewSel = foldBinOpIntoSelect(N))
15360     return NewSel;
15361
15362   return SDValue();
15363 }
15364
15365 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
15366   SDNodeFlags Flags = N->getFlags();
15367   const TargetOptions &Options = DAG.getTarget().Options;
15368
15369   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
15370   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
15371   if (!Flags.hasApproximateFuncs() ||
15372       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
15373     return SDValue();
15374
15375   SDValue N0 = N->getOperand(0);
15376   if (TLI.isFsqrtCheap(N0, DAG))
15377     return SDValue();
15378
15379   // FSQRT nodes have flags that propagate to the created nodes.
15380   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
15381   //       transform the fdiv, we may produce a sub-optimal estimate sequence
15382   //       because the reciprocal calculation may not have to filter out a
15383   //       0.0 input.
15384   return buildSqrtEstimate(N0, Flags);
15385 }
15386
15387 /// copysign(x, fp_extend(y)) -> copysign(x, y)
15388 /// copysign(x, fp_round(y)) -> copysign(x, y)
15389 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
15390   SDValue N1 = N->getOperand(1);
15391   if ((N1.getOpcode() == ISD::FP_EXTEND ||
15392        N1.getOpcode() == ISD::FP_ROUND)) {
15393     EVT N1VT = N1->getValueType(0);
15394     EVT N1Op0VT = N1->getOperand(0).getValueType();
15395
15396     // Always fold no-op FP casts.
15397     if (N1VT == N1Op0VT)
15398       return true;
15399
15400     // Do not optimize out type conversion of f128 type yet.
15401     // For some targets like x86_64, configuration is changed to keep one f128
15402     // value in one SSE register, but instruction selection cannot handle
15403     // FCOPYSIGN on SSE registers yet.
15404     if (N1Op0VT == MVT::f128)
15405       return false;
15406
15407     // Avoid mismatched vector operand types, for better instruction selection.
15408     if (N1Op0VT.isVector())
15409       return false;
15410
15411     return true;
15412   }
15413   return false;
15414 }
15415
15416 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
15417   SDValue N0 = N->getOperand(0);
15418   SDValue N1 = N->getOperand(1);
15419   EVT VT = N->getValueType(0);
15420
15421   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
15422   if (SDValue C =
15423           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
15424     return C;
15425
15426   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
15427     const APFloat &V = N1C->getValueAPF();
15428     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
15429     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
15430     if (!V.isNegative()) {
15431       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
15432         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15433     } else {
15434       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
15435         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
15436                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
15437     }
15438   }
15439
15440   // copysign(fabs(x), y) -> copysign(x, y)
15441   // copysign(fneg(x), y) -> copysign(x, y)
15442   // copysign(copysign(x,z), y) -> copysign(x, y)
15443   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
15444       N0.getOpcode() == ISD::FCOPYSIGN)
15445     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
15446
15447   // copysign(x, abs(y)) -> abs(x)
15448   if (N1.getOpcode() == ISD::FABS)
15449     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15450
15451   // copysign(x, copysign(y,z)) -> copysign(x, z)
15452   if (N1.getOpcode() == ISD::FCOPYSIGN)
15453     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
15454
15455   // copysign(x, fp_extend(y)) -> copysign(x, y)
15456   // copysign(x, fp_round(y)) -> copysign(x, y)
15457   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
15458     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
15459
15460   return SDValue();
15461 }
15462
15463 SDValue DAGCombiner::visitFPOW(SDNode *N) {
15464   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
15465   if (!ExponentC)
15466     return SDValue();
15467   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15468
15469   // Try to convert x ** (1/3) into cube root.
15470   // TODO: Handle the various flavors of long double.
15471   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
15472   //       Some range near 1/3 should be fine.
15473   EVT VT = N->getValueType(0);
15474   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
15475       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
15476     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
15477     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
15478     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
15479     // For regular numbers, rounding may cause the results to differ.
15480     // Therefore, we require { nsz ninf nnan afn } for this transform.
15481     // TODO: We could select out the special cases if we don't have nsz/ninf.
15482     SDNodeFlags Flags = N->getFlags();
15483     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
15484         !Flags.hasApproximateFuncs())
15485       return SDValue();
15486
15487     // Do not create a cbrt() libcall if the target does not have it, and do not
15488     // turn a pow that has lowering support into a cbrt() libcall.
15489     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
15490         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
15491          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
15492       return SDValue();
15493
15494     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
15495   }
15496
15497   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
15498   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
15499   // TODO: This could be extended (using a target hook) to handle smaller
15500   // power-of-2 fractional exponents.
15501   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
15502   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
15503   if (ExponentIs025 || ExponentIs075) {
15504     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15505     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
15506     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15507     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
15508     // For regular numbers, rounding may cause the results to differ.
15509     // Therefore, we require { nsz ninf afn } for this transform.
15510     // TODO: We could select out the special cases if we don't have nsz/ninf.
15511     SDNodeFlags Flags = N->getFlags();
15512
15513     // We only need no signed zeros for the 0.25 case.
15514     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15515         !Flags.hasApproximateFuncs())
15516       return SDValue();
15517
15518     // Don't double the number of libcalls. We are trying to inline fast code.
15519     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
15520       return SDValue();
15521
15522     // Assume that libcalls are the smallest code.
15523     // TODO: This restriction should probably be lifted for vectors.
15524     if (ForCodeSize)
15525       return SDValue();
15526
15527     // pow(X, 0.25) --> sqrt(sqrt(X))
15528     SDLoc DL(N);
15529     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15530     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15531     if (ExponentIs025)
15532       return SqrtSqrt;
15533     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15534     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15535   }
15536
15537   return SDValue();
15538 }
15539
15540 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
15541                                const TargetLowering &TLI) {
15542   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15543   // replacing casts with a libcall. We also must be allowed to ignore -0.0
15544   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15545   // conversions would return +0.0.
15546   // FIXME: We should be able to use node-level FMF here.
15547   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15548   EVT VT = N->getValueType(0);
15549   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15550       !DAG.getTarget().Options.NoSignedZerosFPMath)
15551     return SDValue();
15552
15553   // fptosi/fptoui round towards zero, so converting from FP to integer and
15554   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15555   SDValue N0 = N->getOperand(0);
15556   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15557       N0.getOperand(0).getValueType() == VT)
15558     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15559
15560   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15561       N0.getOperand(0).getValueType() == VT)
15562     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15563
15564   return SDValue();
15565 }
15566
15567 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15568   SDValue N0 = N->getOperand(0);
15569   EVT VT = N->getValueType(0);
15570   EVT OpVT = N0.getValueType();
15571
15572   // [us]itofp(undef) = 0, because the result value is bounded.
15573   if (N0.isUndef())
15574     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15575
15576   // fold (sint_to_fp c1) -> c1fp
15577   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15578       // ...but only if the target supports immediate floating-point values
15579       (!LegalOperations ||
15580        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15581     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15582
15583   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15584   // but UINT_TO_FP is legal on this target, try to convert.
15585   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15586       hasOperation(ISD::UINT_TO_FP, OpVT)) {
15587     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15588     if (DAG.SignBitIsZero(N0))
15589       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15590   }
15591
15592   // The next optimizations are desirable only if SELECT_CC can be lowered.
15593   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15594   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15595       !VT.isVector() &&
15596       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15597     SDLoc DL(N);
15598     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15599                          DAG.getConstantFP(0.0, DL, VT));
15600   }
15601
15602   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15603   //      (select (setcc x, y, cc), 1.0, 0.0)
15604   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15605       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15606       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15607     SDLoc DL(N);
15608     return DAG.getSelect(DL, VT, N0.getOperand(0),
15609                          DAG.getConstantFP(1.0, DL, VT),
15610                          DAG.getConstantFP(0.0, DL, VT));
15611   }
15612
15613   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15614     return FTrunc;
15615
15616   return SDValue();
15617 }
15618
15619 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15620   SDValue N0 = N->getOperand(0);
15621   EVT VT = N->getValueType(0);
15622   EVT OpVT = N0.getValueType();
15623
15624   // [us]itofp(undef) = 0, because the result value is bounded.
15625   if (N0.isUndef())
15626     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15627
15628   // fold (uint_to_fp c1) -> c1fp
15629   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15630       // ...but only if the target supports immediate floating-point values
15631       (!LegalOperations ||
15632        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15633     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15634
15635   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15636   // but SINT_TO_FP is legal on this target, try to convert.
15637   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15638       hasOperation(ISD::SINT_TO_FP, OpVT)) {
15639     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15640     if (DAG.SignBitIsZero(N0))
15641       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15642   }
15643
15644   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15645   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15646       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15647     SDLoc DL(N);
15648     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15649                          DAG.getConstantFP(0.0, DL, VT));
15650   }
15651
15652   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15653     return FTrunc;
15654
15655   return SDValue();
15656 }
15657
15658 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15659 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15660   SDValue N0 = N->getOperand(0);
15661   EVT VT = N->getValueType(0);
15662
15663   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15664     return SDValue();
15665
15666   SDValue Src = N0.getOperand(0);
15667   EVT SrcVT = Src.getValueType();
15668   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15669   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15670
15671   // We can safely assume the conversion won't overflow the output range,
15672   // because (for example) (uint8_t)18293.f is undefined behavior.
15673
15674   // Since we can assume the conversion won't overflow, our decision as to
15675   // whether the input will fit in the float should depend on the minimum
15676   // of the input range and output range.
15677
15678   // This means this is also safe for a signed input and unsigned output, since
15679   // a negative input would lead to undefined behavior.
15680   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15681   unsigned OutputSize = (int)VT.getScalarSizeInBits();
15682   unsigned ActualSize = std::min(InputSize, OutputSize);
15683   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15684
15685   // We can only fold away the float conversion if the input range can be
15686   // represented exactly in the float range.
15687   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15688     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15689       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15690                                                        : ISD::ZERO_EXTEND;
15691       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15692     }
15693     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15694       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15695     return DAG.getBitcast(VT, Src);
15696   }
15697   return SDValue();
15698 }
15699
15700 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15701   SDValue N0 = N->getOperand(0);
15702   EVT VT = N->getValueType(0);
15703
15704   // fold (fp_to_sint undef) -> undef
15705   if (N0.isUndef())
15706     return DAG.getUNDEF(VT);
15707
15708   // fold (fp_to_sint c1fp) -> c1
15709   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15710     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15711
15712   return FoldIntToFPToInt(N, DAG);
15713 }
15714
15715 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15716   SDValue N0 = N->getOperand(0);
15717   EVT VT = N->getValueType(0);
15718
15719   // fold (fp_to_uint undef) -> undef
15720   if (N0.isUndef())
15721     return DAG.getUNDEF(VT);
15722
15723   // fold (fp_to_uint c1fp) -> c1
15724   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15725     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15726
15727   return FoldIntToFPToInt(N, DAG);
15728 }
15729
15730 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15731   SDValue N0 = N->getOperand(0);
15732   SDValue N1 = N->getOperand(1);
15733   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15734   EVT VT = N->getValueType(0);
15735
15736   // fold (fp_round c1fp) -> c1fp
15737   if (N0CFP)
15738     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15739
15740   // fold (fp_round (fp_extend x)) -> x
15741   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15742     return N0.getOperand(0);
15743
15744   // fold (fp_round (fp_round x)) -> (fp_round x)
15745   if (N0.getOpcode() == ISD::FP_ROUND) {
15746     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15747     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15748
15749     // Skip this folding if it results in an fp_round from f80 to f16.
15750     //
15751     // f80 to f16 always generates an expensive (and as yet, unimplemented)
15752     // libcall to __truncxfhf2 instead of selecting native f16 conversion
15753     // instructions from f32 or f64.  Moreover, the first (value-preserving)
15754     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15755     // x86.
15756     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15757       return SDValue();
15758
15759     // If the first fp_round isn't a value preserving truncation, it might
15760     // introduce a tie in the second fp_round, that wouldn't occur in the
15761     // single-step fp_round we want to fold to.
15762     // In other words, double rounding isn't the same as rounding.
15763     // Also, this is a value preserving truncation iff both fp_round's are.
15764     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15765       SDLoc DL(N);
15766       return DAG.getNode(
15767           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15768           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
15769     }
15770   }
15771
15772   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15773   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
15774     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15775                               N0.getOperand(0), N1);
15776     AddToWorklist(Tmp.getNode());
15777     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15778                        Tmp, N0.getOperand(1));
15779   }
15780
15781   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15782     return NewVSel;
15783
15784   return SDValue();
15785 }
15786
15787 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15788   SDValue N0 = N->getOperand(0);
15789   EVT VT = N->getValueType(0);
15790
15791   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15792   if (N->hasOneUse() &&
15793       N->use_begin()->getOpcode() == ISD::FP_ROUND)
15794     return SDValue();
15795
15796   // fold (fp_extend c1fp) -> c1fp
15797   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15798     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15799
15800   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15801   if (N0.getOpcode() == ISD::FP16_TO_FP &&
15802       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15803     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15804
15805   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15806   // value of X.
15807   if (N0.getOpcode() == ISD::FP_ROUND
15808       && N0.getConstantOperandVal(1) == 1) {
15809     SDValue In = N0.getOperand(0);
15810     if (In.getValueType() == VT) return In;
15811     if (VT.bitsLT(In.getValueType()))
15812       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15813                          In, N0.getOperand(1));
15814     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15815   }
15816
15817   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15818   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15819       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15820     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15821     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15822                                      LN0->getChain(),
15823                                      LN0->getBasePtr(), N0.getValueType(),
15824                                      LN0->getMemOperand());
15825     CombineTo(N, ExtLoad);
15826     CombineTo(
15827         N0.getNode(),
15828         DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
15829                     DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
15830         ExtLoad.getValue(1));
15831     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15832   }
15833
15834   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15835     return NewVSel;
15836
15837   return SDValue();
15838 }
15839
15840 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15841   SDValue N0 = N->getOperand(0);
15842   EVT VT = N->getValueType(0);
15843
15844   // fold (fceil c1) -> fceil(c1)
15845   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15846     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15847
15848   return SDValue();
15849 }
15850
15851 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15852   SDValue N0 = N->getOperand(0);
15853   EVT VT = N->getValueType(0);
15854
15855   // fold (ftrunc c1) -> ftrunc(c1)
15856   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15857     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15858
15859   // fold ftrunc (known rounded int x) -> x
15860   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15861   // likely to be generated to extract integer from a rounded floating value.
15862   switch (N0.getOpcode()) {
15863   default: break;
15864   case ISD::FRINT:
15865   case ISD::FTRUNC:
15866   case ISD::FNEARBYINT:
15867   case ISD::FFLOOR:
15868   case ISD::FCEIL:
15869     return N0;
15870   }
15871
15872   return SDValue();
15873 }
15874
15875 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15876   SDValue N0 = N->getOperand(0);
15877   EVT VT = N->getValueType(0);
15878
15879   // fold (ffloor c1) -> ffloor(c1)
15880   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15881     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15882
15883   return SDValue();
15884 }
15885
15886 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15887   SDValue N0 = N->getOperand(0);
15888   EVT VT = N->getValueType(0);
15889   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15890
15891   // Constant fold FNEG.
15892   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15893     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15894
15895   if (SDValue NegN0 =
15896           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15897     return NegN0;
15898
15899   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15900   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15901   // know it was called from a context with a nsz flag if the input fsub does
15902   // not.
15903   if (N0.getOpcode() == ISD::FSUB &&
15904       (DAG.getTarget().Options.NoSignedZerosFPMath ||
15905        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15906     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15907                        N0.getOperand(0));
15908   }
15909
15910   if (SDValue Cast = foldSignChangeInBitcast(N))
15911     return Cast;
15912
15913   return SDValue();
15914 }
15915
15916 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15917   SDValue N0 = N->getOperand(0);
15918   SDValue N1 = N->getOperand(1);
15919   EVT VT = N->getValueType(0);
15920   const SDNodeFlags Flags = N->getFlags();
15921   unsigned Opc = N->getOpcode();
15922   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15923   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15924   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15925
15926   // Constant fold.
15927   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15928     return C;
15929
15930   // Canonicalize to constant on RHS.
15931   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15932       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15933     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15934
15935   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15936     const APFloat &AF = N1CFP->getValueAPF();
15937
15938     // minnum(X, nan) -> X
15939     // maxnum(X, nan) -> X
15940     // minimum(X, nan) -> nan
15941     // maximum(X, nan) -> nan
15942     if (AF.isNaN())
15943       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15944
15945     // In the following folds, inf can be replaced with the largest finite
15946     // float, if the ninf flag is set.
15947     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15948       // minnum(X, -inf) -> -inf
15949       // maxnum(X, +inf) -> +inf
15950       // minimum(X, -inf) -> -inf if nnan
15951       // maximum(X, +inf) -> +inf if nnan
15952       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15953         return N->getOperand(1);
15954
15955       // minnum(X, +inf) -> X if nnan
15956       // maxnum(X, -inf) -> X if nnan
15957       // minimum(X, +inf) -> X
15958       // maximum(X, -inf) -> X
15959       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15960         return N->getOperand(0);
15961     }
15962   }
15963
15964   return SDValue();
15965 }
15966
15967 SDValue DAGCombiner::visitFABS(SDNode *N) {
15968   SDValue N0 = N->getOperand(0);
15969   EVT VT = N->getValueType(0);
15970
15971   // fold (fabs c1) -> fabs(c1)
15972   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15973     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15974
15975   // fold (fabs (fabs x)) -> (fabs x)
15976   if (N0.getOpcode() == ISD::FABS)
15977     return N->getOperand(0);
15978
15979   // fold (fabs (fneg x)) -> (fabs x)
15980   // fold (fabs (fcopysign x, y)) -> (fabs x)
15981   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15982     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15983
15984   if (SDValue Cast = foldSignChangeInBitcast(N))
15985     return Cast;
15986
15987   return SDValue();
15988 }
15989
15990 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15991   SDValue Chain = N->getOperand(0);
15992   SDValue N1 = N->getOperand(1);
15993   SDValue N2 = N->getOperand(2);
15994
15995   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15996   // nondeterministic jumps).
15997   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15998     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15999                        N1->getOperand(0), N2);
16000   }
16001
16002   // If N is a constant we could fold this into a fallthrough or unconditional
16003   // branch. However that doesn't happen very often in normal code, because
16004   // Instcombine/SimplifyCFG should have handled the available opportunities.
16005   // If we did this folding here, it would be necessary to update the
16006   // MachineBasicBlock CFG, which is awkward.
16007
16008   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
16009   // on the target.
16010   if (N1.getOpcode() == ISD::SETCC &&
16011       TLI.isOperationLegalOrCustom(ISD::BR_CC,
16012                                    N1.getOperand(0).getValueType())) {
16013     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
16014                        Chain, N1.getOperand(2),
16015                        N1.getOperand(0), N1.getOperand(1), N2);
16016   }
16017
16018   if (N1.hasOneUse()) {
16019     // rebuildSetCC calls visitXor which may change the Chain when there is a
16020     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
16021     HandleSDNode ChainHandle(Chain);
16022     if (SDValue NewN1 = rebuildSetCC(N1))
16023       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
16024                          ChainHandle.getValue(), NewN1, N2);
16025   }
16026
16027   return SDValue();
16028 }
16029
16030 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
16031   if (N.getOpcode() == ISD::SRL ||
16032       (N.getOpcode() == ISD::TRUNCATE &&
16033        (N.getOperand(0).hasOneUse() &&
16034         N.getOperand(0).getOpcode() == ISD::SRL))) {
16035     // Look pass the truncate.
16036     if (N.getOpcode() == ISD::TRUNCATE)
16037       N = N.getOperand(0);
16038
16039     // Match this pattern so that we can generate simpler code:
16040     //
16041     //   %a = ...
16042     //   %b = and i32 %a, 2
16043     //   %c = srl i32 %b, 1
16044     //   brcond i32 %c ...
16045     //
16046     // into
16047     //
16048     //   %a = ...
16049     //   %b = and i32 %a, 2
16050     //   %c = setcc eq %b, 0
16051     //   brcond %c ...
16052     //
16053     // This applies only when the AND constant value has one bit set and the
16054     // SRL constant is equal to the log2 of the AND constant. The back-end is
16055     // smart enough to convert the result into a TEST/JMP sequence.
16056     SDValue Op0 = N.getOperand(0);
16057     SDValue Op1 = N.getOperand(1);
16058
16059     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
16060       SDValue AndOp1 = Op0.getOperand(1);
16061
16062       if (AndOp1.getOpcode() == ISD::Constant) {
16063         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
16064
16065         if (AndConst.isPowerOf2() &&
16066             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
16067           SDLoc DL(N);
16068           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
16069                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
16070                               ISD::SETNE);
16071         }
16072       }
16073     }
16074   }
16075
16076   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
16077   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
16078   if (N.getOpcode() == ISD::XOR) {
16079     // Because we may call this on a speculatively constructed
16080     // SimplifiedSetCC Node, we need to simplify this node first.
16081     // Ideally this should be folded into SimplifySetCC and not
16082     // here. For now, grab a handle to N so we don't lose it from
16083     // replacements interal to the visit.
16084     HandleSDNode XORHandle(N);
16085     while (N.getOpcode() == ISD::XOR) {
16086       SDValue Tmp = visitXOR(N.getNode());
16087       // No simplification done.
16088       if (!Tmp.getNode())
16089         break;
16090       // Returning N is form in-visit replacement that may invalidated
16091       // N. Grab value from Handle.
16092       if (Tmp.getNode() == N.getNode())
16093         N = XORHandle.getValue();
16094       else // Node simplified. Try simplifying again.
16095         N = Tmp;
16096     }
16097
16098     if (N.getOpcode() != ISD::XOR)
16099       return N;
16100
16101     SDValue Op0 = N->getOperand(0);
16102     SDValue Op1 = N->getOperand(1);
16103
16104     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
16105       bool Equal = false;
16106       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
16107       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
16108           Op0.getValueType() == MVT::i1) {
16109         N = Op0;
16110         Op0 = N->getOperand(0);
16111         Op1 = N->getOperand(1);
16112         Equal = true;
16113       }
16114
16115       EVT SetCCVT = N.getValueType();
16116       if (LegalTypes)
16117         SetCCVT = getSetCCResultType(SetCCVT);
16118       // Replace the uses of XOR with SETCC
16119       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
16120                           Equal ? ISD::SETEQ : ISD::SETNE);
16121     }
16122   }
16123
16124   return SDValue();
16125 }
16126
16127 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
16128 //
16129 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
16130   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
16131   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
16132
16133   // If N is a constant we could fold this into a fallthrough or unconditional
16134   // branch. However that doesn't happen very often in normal code, because
16135   // Instcombine/SimplifyCFG should have handled the available opportunities.
16136   // If we did this folding here, it would be necessary to update the
16137   // MachineBasicBlock CFG, which is awkward.
16138
16139   // Use SimplifySetCC to simplify SETCC's.
16140   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
16141                                CondLHS, CondRHS, CC->get(), SDLoc(N),
16142                                false);
16143   if (Simp.getNode()) AddToWorklist(Simp.getNode());
16144
16145   // fold to a simpler setcc
16146   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
16147     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
16148                        N->getOperand(0), Simp.getOperand(2),
16149                        Simp.getOperand(0), Simp.getOperand(1),
16150                        N->getOperand(4));
16151
16152   return SDValue();
16153 }
16154
16155 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
16156                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
16157                                      const TargetLowering &TLI) {
16158   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16159     if (LD->isIndexed())
16160       return false;
16161     EVT VT = LD->getMemoryVT();
16162     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
16163       return false;
16164     Ptr = LD->getBasePtr();
16165   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16166     if (ST->isIndexed())
16167       return false;
16168     EVT VT = ST->getMemoryVT();
16169     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
16170       return false;
16171     Ptr = ST->getBasePtr();
16172     IsLoad = false;
16173   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
16174     if (LD->isIndexed())
16175       return false;
16176     EVT VT = LD->getMemoryVT();
16177     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
16178         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
16179       return false;
16180     Ptr = LD->getBasePtr();
16181     IsMasked = true;
16182   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
16183     if (ST->isIndexed())
16184       return false;
16185     EVT VT = ST->getMemoryVT();
16186     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
16187         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
16188       return false;
16189     Ptr = ST->getBasePtr();
16190     IsLoad = false;
16191     IsMasked = true;
16192   } else {
16193     return false;
16194   }
16195   return true;
16196 }
16197
16198 /// Try turning a load/store into a pre-indexed load/store when the base
16199 /// pointer is an add or subtract and it has other uses besides the load/store.
16200 /// After the transformation, the new indexed load/store has effectively folded
16201 /// the add/subtract in and all of its other uses are redirected to the
16202 /// new load/store.
16203 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
16204   if (Level < AfterLegalizeDAG)
16205     return false;
16206
16207   bool IsLoad = true;
16208   bool IsMasked = false;
16209   SDValue Ptr;
16210   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
16211                                 Ptr, TLI))
16212     return false;
16213
16214   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
16215   // out.  There is no reason to make this a preinc/predec.
16216   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
16217       Ptr->hasOneUse())
16218     return false;
16219
16220   // Ask the target to do addressing mode selection.
16221   SDValue BasePtr;
16222   SDValue Offset;
16223   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16224   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
16225     return false;
16226
16227   // Backends without true r+i pre-indexed forms may need to pass a
16228   // constant base with a variable offset so that constant coercion
16229   // will work with the patterns in canonical form.
16230   bool Swapped = false;
16231   if (isa<ConstantSDNode>(BasePtr)) {
16232     std::swap(BasePtr, Offset);
16233     Swapped = true;
16234   }
16235
16236   // Don't create a indexed load / store with zero offset.
16237   if (isNullConstant(Offset))
16238     return false;
16239
16240   // Try turning it into a pre-indexed load / store except when:
16241   // 1) The new base ptr is a frame index.
16242   // 2) If N is a store and the new base ptr is either the same as or is a
16243   //    predecessor of the value being stored.
16244   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
16245   //    that would create a cycle.
16246   // 4) All uses are load / store ops that use it as old base ptr.
16247
16248   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
16249   // (plus the implicit offset) to a register to preinc anyway.
16250   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16251     return false;
16252
16253   // Check #2.
16254   if (!IsLoad) {
16255     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
16256                            : cast<StoreSDNode>(N)->getValue();
16257
16258     // Would require a copy.
16259     if (Val == BasePtr)
16260       return false;
16261
16262     // Would create a cycle.
16263     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
16264       return false;
16265   }
16266
16267   // Caches for hasPredecessorHelper.
16268   SmallPtrSet<const SDNode *, 32> Visited;
16269   SmallVector<const SDNode *, 16> Worklist;
16270   Worklist.push_back(N);
16271
16272   // If the offset is a constant, there may be other adds of constants that
16273   // can be folded with this one. We should do this to avoid having to keep
16274   // a copy of the original base pointer.
16275   SmallVector<SDNode *, 16> OtherUses;
16276   if (isa<ConstantSDNode>(Offset))
16277     for (SDNode::use_iterator UI = BasePtr->use_begin(),
16278                               UE = BasePtr->use_end();
16279          UI != UE; ++UI) {
16280       SDUse &Use = UI.getUse();
16281       // Skip the use that is Ptr and uses of other results from BasePtr's
16282       // node (important for nodes that return multiple results).
16283       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
16284         continue;
16285
16286       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
16287         continue;
16288
16289       if (Use.getUser()->getOpcode() != ISD::ADD &&
16290           Use.getUser()->getOpcode() != ISD::SUB) {
16291         OtherUses.clear();
16292         break;
16293       }
16294
16295       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
16296       if (!isa<ConstantSDNode>(Op1)) {
16297         OtherUses.clear();
16298         break;
16299       }
16300
16301       // FIXME: In some cases, we can be smarter about this.
16302       if (Op1.getValueType() != Offset.getValueType()) {
16303         OtherUses.clear();
16304         break;
16305       }
16306
16307       OtherUses.push_back(Use.getUser());
16308     }
16309
16310   if (Swapped)
16311     std::swap(BasePtr, Offset);
16312
16313   // Now check for #3 and #4.
16314   bool RealUse = false;
16315
16316   for (SDNode *Use : Ptr->uses()) {
16317     if (Use == N)
16318       continue;
16319     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
16320       return false;
16321
16322     // If Ptr may be folded in addressing mode of other use, then it's
16323     // not profitable to do this transformation.
16324     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
16325       RealUse = true;
16326   }
16327
16328   if (!RealUse)
16329     return false;
16330
16331   SDValue Result;
16332   if (!IsMasked) {
16333     if (IsLoad)
16334       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16335     else
16336       Result =
16337           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16338   } else {
16339     if (IsLoad)
16340       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16341                                         Offset, AM);
16342     else
16343       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
16344                                          Offset, AM);
16345   }
16346   ++PreIndexedNodes;
16347   ++NodesCombined;
16348   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
16349              Result.dump(&DAG); dbgs() << '\n');
16350   WorklistRemover DeadNodes(*this);
16351   if (IsLoad) {
16352     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16353     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16354   } else {
16355     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16356   }
16357
16358   // Finally, since the node is now dead, remove it from the graph.
16359   deleteAndRecombine(N);
16360
16361   if (Swapped)
16362     std::swap(BasePtr, Offset);
16363
16364   // Replace other uses of BasePtr that can be updated to use Ptr
16365   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
16366     unsigned OffsetIdx = 1;
16367     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
16368       OffsetIdx = 0;
16369     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
16370            BasePtr.getNode() && "Expected BasePtr operand");
16371
16372     // We need to replace ptr0 in the following expression:
16373     //   x0 * offset0 + y0 * ptr0 = t0
16374     // knowing that
16375     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
16376     //
16377     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
16378     // indexed load/store and the expression that needs to be re-written.
16379     //
16380     // Therefore, we have:
16381     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
16382
16383     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
16384     const APInt &Offset0 = CN->getAPIntValue();
16385     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
16386     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
16387     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
16388     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
16389     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
16390
16391     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
16392
16393     APInt CNV = Offset0;
16394     if (X0 < 0) CNV = -CNV;
16395     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
16396     else CNV = CNV - Offset1;
16397
16398     SDLoc DL(OtherUses[i]);
16399
16400     // We can now generate the new expression.
16401     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
16402     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
16403
16404     SDValue NewUse = DAG.getNode(Opcode,
16405                                  DL,
16406                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
16407     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
16408     deleteAndRecombine(OtherUses[i]);
16409   }
16410
16411   // Replace the uses of Ptr with uses of the updated base value.
16412   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
16413   deleteAndRecombine(Ptr.getNode());
16414   AddToWorklist(Result.getNode());
16415
16416   return true;
16417 }
16418
16419 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
16420                                    SDValue &BasePtr, SDValue &Offset,
16421                                    ISD::MemIndexedMode &AM,
16422                                    SelectionDAG &DAG,
16423                                    const TargetLowering &TLI) {
16424   if (PtrUse == N ||
16425       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
16426     return false;
16427
16428   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
16429     return false;
16430
16431   // Don't create a indexed load / store with zero offset.
16432   if (isNullConstant(Offset))
16433     return false;
16434
16435   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16436     return false;
16437
16438   SmallPtrSet<const SDNode *, 32> Visited;
16439   for (SDNode *Use : BasePtr->uses()) {
16440     if (Use == Ptr.getNode())
16441       continue;
16442
16443     // No if there's a later user which could perform the index instead.
16444     if (isa<MemSDNode>(Use)) {
16445       bool IsLoad = true;
16446       bool IsMasked = false;
16447       SDValue OtherPtr;
16448       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16449                                    IsMasked, OtherPtr, TLI)) {
16450         SmallVector<const SDNode *, 2> Worklist;
16451         Worklist.push_back(Use);
16452         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
16453           return false;
16454       }
16455     }
16456
16457     // If all the uses are load / store addresses, then don't do the
16458     // transformation.
16459     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
16460       for (SDNode *UseUse : Use->uses())
16461         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
16462           return false;
16463     }
16464   }
16465   return true;
16466 }
16467
16468 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
16469                                          bool &IsMasked, SDValue &Ptr,
16470                                          SDValue &BasePtr, SDValue &Offset,
16471                                          ISD::MemIndexedMode &AM,
16472                                          SelectionDAG &DAG,
16473                                          const TargetLowering &TLI) {
16474   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16475                                 IsMasked, Ptr, TLI) ||
16476       Ptr->hasOneUse())
16477     return nullptr;
16478
16479   // Try turning it into a post-indexed load / store except when
16480   // 1) All uses are load / store ops that use it as base ptr (and
16481   //    it may be folded as addressing mmode).
16482   // 2) Op must be independent of N, i.e. Op is neither a predecessor
16483   //    nor a successor of N. Otherwise, if Op is folded that would
16484   //    create a cycle.
16485   for (SDNode *Op : Ptr->uses()) {
16486     // Check for #1.
16487     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
16488       continue;
16489
16490     // Check for #2.
16491     SmallPtrSet<const SDNode *, 32> Visited;
16492     SmallVector<const SDNode *, 8> Worklist;
16493     // Ptr is predecessor to both N and Op.
16494     Visited.insert(Ptr.getNode());
16495     Worklist.push_back(N);
16496     Worklist.push_back(Op);
16497     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
16498         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
16499       return Op;
16500   }
16501   return nullptr;
16502 }
16503
16504 /// Try to combine a load/store with a add/sub of the base pointer node into a
16505 /// post-indexed load/store. The transformation folded the add/subtract into the
16506 /// new indexed load/store effectively and all of its uses are redirected to the
16507 /// new load/store.
16508 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16509   if (Level < AfterLegalizeDAG)
16510     return false;
16511
16512   bool IsLoad = true;
16513   bool IsMasked = false;
16514   SDValue Ptr;
16515   SDValue BasePtr;
16516   SDValue Offset;
16517   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16518   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16519                                          Offset, AM, DAG, TLI);
16520   if (!Op)
16521     return false;
16522
16523   SDValue Result;
16524   if (!IsMasked)
16525     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16526                                          Offset, AM)
16527                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16528                                           BasePtr, Offset, AM);
16529   else
16530     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16531                                                BasePtr, Offset, AM)
16532                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16533                                                 BasePtr, Offset, AM);
16534   ++PostIndexedNodes;
16535   ++NodesCombined;
16536   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
16537              Result.dump(&DAG); dbgs() << '\n');
16538   WorklistRemover DeadNodes(*this);
16539   if (IsLoad) {
16540     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16541     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16542   } else {
16543     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16544   }
16545
16546   // Finally, since the node is now dead, remove it from the graph.
16547   deleteAndRecombine(N);
16548
16549   // Replace the uses of Use with uses of the updated base value.
16550   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
16551                                 Result.getValue(IsLoad ? 1 : 0));
16552   deleteAndRecombine(Op);
16553   return true;
16554 }
16555
16556 /// Return the base-pointer arithmetic from an indexed \p LD.
16557 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16558   ISD::MemIndexedMode AM = LD->getAddressingMode();
16559   assert(AM != ISD::UNINDEXED);
16560   SDValue BP = LD->getOperand(1);
16561   SDValue Inc = LD->getOperand(2);
16562
16563   // Some backends use TargetConstants for load offsets, but don't expect
16564   // TargetConstants in general ADD nodes. We can convert these constants into
16565   // regular Constants (if the constant is not opaque).
16566   assert((Inc.getOpcode() != ISD::TargetConstant ||
16567           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16568          "Cannot split out indexing using opaque target constants");
16569   if (Inc.getOpcode() == ISD::TargetConstant) {
16570     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16571     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16572                           ConstInc->getValueType(0));
16573   }
16574
16575   unsigned Opc =
16576       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16577   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16578 }
16579
16580 static inline ElementCount numVectorEltsOrZero(EVT T) {
16581   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16582 }
16583
16584 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16585   Val = ST->getValue();
16586   EVT STType = Val.getValueType();
16587   EVT STMemType = ST->getMemoryVT();
16588   if (STType == STMemType)
16589     return true;
16590   if (isTypeLegal(STMemType))
16591     return false; // fail.
16592   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16593       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16594     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16595     return true;
16596   }
16597   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16598       STType.isInteger() && STMemType.isInteger()) {
16599     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16600     return true;
16601   }
16602   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16603     Val = DAG.getBitcast(STMemType, Val);
16604     return true;
16605   }
16606   return false; // fail.
16607 }
16608
16609 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16610   EVT LDMemType = LD->getMemoryVT();
16611   EVT LDType = LD->getValueType(0);
16612   assert(Val.getValueType() == LDMemType &&
16613          "Attempting to extend value of non-matching type");
16614   if (LDType == LDMemType)
16615     return true;
16616   if (LDMemType.isInteger() && LDType.isInteger()) {
16617     switch (LD->getExtensionType()) {
16618     case ISD::NON_EXTLOAD:
16619       Val = DAG.getBitcast(LDType, Val);
16620       return true;
16621     case ISD::EXTLOAD:
16622       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16623       return true;
16624     case ISD::SEXTLOAD:
16625       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16626       return true;
16627     case ISD::ZEXTLOAD:
16628       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16629       return true;
16630     }
16631   }
16632   return false;
16633 }
16634
16635 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16636   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16637     return SDValue();
16638   SDValue Chain = LD->getOperand(0);
16639   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16640   // TODO: Relax this restriction for unordered atomics (see D66309)
16641   if (!ST || !ST->isSimple())
16642     return SDValue();
16643
16644   EVT LDType = LD->getValueType(0);
16645   EVT LDMemType = LD->getMemoryVT();
16646   EVT STMemType = ST->getMemoryVT();
16647   EVT STType = ST->getValue().getValueType();
16648
16649   // There are two cases to consider here:
16650   //  1. The store is fixed width and the load is scalable. In this case we
16651   //     don't know at compile time if the store completely envelops the load
16652   //     so we abandon the optimisation.
16653   //  2. The store is scalable and the load is fixed width. We could
16654   //     potentially support a limited number of cases here, but there has been
16655   //     no cost-benefit analysis to prove it's worth it.
16656   bool LdStScalable = LDMemType.isScalableVector();
16657   if (LdStScalable != STMemType.isScalableVector())
16658     return SDValue();
16659
16660   // If we are dealing with scalable vectors on a big endian platform the
16661   // calculation of offsets below becomes trickier, since we do not know at
16662   // compile time the absolute size of the vector. Until we've done more
16663   // analysis on big-endian platforms it seems better to bail out for now.
16664   if (LdStScalable && DAG.getDataLayout().isBigEndian())
16665     return SDValue();
16666
16667   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16668   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16669   int64_t Offset;
16670   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16671     return SDValue();
16672
16673   // Normalize for Endianness. After this Offset=0 will denote that the least
16674   // significant bit in the loaded value maps to the least significant bit in
16675   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16676   // n:th least significant byte of the stored value.
16677   if (DAG.getDataLayout().isBigEndian())
16678     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16679               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16680                  8 -
16681              Offset;
16682
16683   // Check that the stored value cover all bits that are loaded.
16684   bool STCoversLD;
16685
16686   TypeSize LdMemSize = LDMemType.getSizeInBits();
16687   TypeSize StMemSize = STMemType.getSizeInBits();
16688   if (LdStScalable)
16689     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16690   else
16691     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16692                                    StMemSize.getFixedSize());
16693
16694   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16695     if (LD->isIndexed()) {
16696       // Cannot handle opaque target constants and we must respect the user's
16697       // request not to split indexes from loads.
16698       if (!canSplitIdx(LD))
16699         return SDValue();
16700       SDValue Idx = SplitIndexingFromLoad(LD);
16701       SDValue Ops[] = {Val, Idx, Chain};
16702       return CombineTo(LD, Ops, 3);
16703     }
16704     return CombineTo(LD, Val, Chain);
16705   };
16706
16707   if (!STCoversLD)
16708     return SDValue();
16709
16710   // Memory as copy space (potentially masked).
16711   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16712     // Simple case: Direct non-truncating forwarding
16713     if (LDType.getSizeInBits() == LdMemSize)
16714       return ReplaceLd(LD, ST->getValue(), Chain);
16715     // Can we model the truncate and extension with an and mask?
16716     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16717         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16718       // Mask to size of LDMemType
16719       auto Mask =
16720           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16721                                                StMemSize.getFixedSize()),
16722                           SDLoc(ST), STType);
16723       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16724       return ReplaceLd(LD, Val, Chain);
16725     }
16726   }
16727
16728   // TODO: Deal with nonzero offset.
16729   if (LD->getBasePtr().isUndef() || Offset != 0)
16730     return SDValue();
16731   // Model necessary truncations / extenstions.
16732   SDValue Val;
16733   // Truncate Value To Stored Memory Size.
16734   do {
16735     if (!getTruncatedStoreValue(ST, Val))
16736       continue;
16737     if (!isTypeLegal(LDMemType))
16738       continue;
16739     if (STMemType != LDMemType) {
16740       // TODO: Support vectors? This requires extract_subvector/bitcast.
16741       if (!STMemType.isVector() && !LDMemType.isVector() &&
16742           STMemType.isInteger() && LDMemType.isInteger())
16743         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16744       else
16745         continue;
16746     }
16747     if (!extendLoadedValueToExtension(LD, Val))
16748       continue;
16749     return ReplaceLd(LD, Val, Chain);
16750   } while (false);
16751
16752   // On failure, cleanup dead nodes we may have created.
16753   if (Val->use_empty())
16754     deleteAndRecombine(Val.getNode());
16755   return SDValue();
16756 }
16757
16758 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16759   LoadSDNode *LD  = cast<LoadSDNode>(N);
16760   SDValue Chain = LD->getChain();
16761   SDValue Ptr   = LD->getBasePtr();
16762
16763   // If load is not volatile and there are no uses of the loaded value (and
16764   // the updated indexed value in case of indexed loads), change uses of the
16765   // chain value into uses of the chain input (i.e. delete the dead load).
16766   // TODO: Allow this for unordered atomics (see D66309)
16767   if (LD->isSimple()) {
16768     if (N->getValueType(1) == MVT::Other) {
16769       // Unindexed loads.
16770       if (!N->hasAnyUseOfValue(0)) {
16771         // It's not safe to use the two value CombineTo variant here. e.g.
16772         // v1, chain2 = load chain1, loc
16773         // v2, chain3 = load chain2, loc
16774         // v3         = add v2, c
16775         // Now we replace use of chain2 with chain1.  This makes the second load
16776         // isomorphic to the one we are deleting, and thus makes this load live.
16777         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16778                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
16779                    dbgs() << "\n");
16780         WorklistRemover DeadNodes(*this);
16781         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16782         AddUsersToWorklist(Chain.getNode());
16783         if (N->use_empty())
16784           deleteAndRecombine(N);
16785
16786         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16787       }
16788     } else {
16789       // Indexed loads.
16790       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16791
16792       // If this load has an opaque TargetConstant offset, then we cannot split
16793       // the indexing into an add/sub directly (that TargetConstant may not be
16794       // valid for a different type of node, and we cannot convert an opaque
16795       // target constant into a regular constant).
16796       bool CanSplitIdx = canSplitIdx(LD);
16797
16798       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16799         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16800         SDValue Index;
16801         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16802           Index = SplitIndexingFromLoad(LD);
16803           // Try to fold the base pointer arithmetic into subsequent loads and
16804           // stores.
16805           AddUsersToWorklist(N);
16806         } else
16807           Index = DAG.getUNDEF(N->getValueType(1));
16808         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16809                    dbgs() << "\nWith: "; Undef.dump(&DAG);
16810                    dbgs() << " and 2 other values\n");
16811         WorklistRemover DeadNodes(*this);
16812         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16813         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16814         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16815         deleteAndRecombine(N);
16816         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16817       }
16818     }
16819   }
16820
16821   // If this load is directly stored, replace the load value with the stored
16822   // value.
16823   if (auto V = ForwardStoreValueToDirectLoad(LD))
16824     return V;
16825
16826   // Try to infer better alignment information than the load already has.
16827   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16828     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16829       if (*Alignment > LD->getAlign() &&
16830           isAligned(*Alignment, LD->getSrcValueOffset())) {
16831         SDValue NewLoad = DAG.getExtLoad(
16832             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16833             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16834             LD->getMemOperand()->getFlags(), LD->getAAInfo());
16835         // NewLoad will always be N as we are only refining the alignment
16836         assert(NewLoad.getNode() == N);
16837         (void)NewLoad;
16838       }
16839     }
16840   }
16841
16842   if (LD->isUnindexed()) {
16843     // Walk up chain skipping non-aliasing memory nodes.
16844     SDValue BetterChain = FindBetterChain(LD, Chain);
16845
16846     // If there is a better chain.
16847     if (Chain != BetterChain) {
16848       SDValue ReplLoad;
16849
16850       // Replace the chain to void dependency.
16851       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16852         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16853                                BetterChain, Ptr, LD->getMemOperand());
16854       } else {
16855         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16856                                   LD->getValueType(0),
16857                                   BetterChain, Ptr, LD->getMemoryVT(),
16858                                   LD->getMemOperand());
16859       }
16860
16861       // Create token factor to keep old chain connected.
16862       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16863                                   MVT::Other, Chain, ReplLoad.getValue(1));
16864
16865       // Replace uses with load result and token factor
16866       return CombineTo(N, ReplLoad.getValue(0), Token);
16867     }
16868   }
16869
16870   // Try transforming N to an indexed load.
16871   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16872     return SDValue(N, 0);
16873
16874   // Try to slice up N to more direct loads if the slices are mapped to
16875   // different register banks or pairing can take place.
16876   if (SliceUpLoad(N))
16877     return SDValue(N, 0);
16878
16879   return SDValue();
16880 }
16881
16882 namespace {
16883
16884 /// Helper structure used to slice a load in smaller loads.
16885 /// Basically a slice is obtained from the following sequence:
16886 /// Origin = load Ty1, Base
16887 /// Shift = srl Ty1 Origin, CstTy Amount
16888 /// Inst = trunc Shift to Ty2
16889 ///
16890 /// Then, it will be rewritten into:
16891 /// Slice = load SliceTy, Base + SliceOffset
16892 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16893 ///
16894 /// SliceTy is deduced from the number of bits that are actually used to
16895 /// build Inst.
16896 struct LoadedSlice {
16897   /// Helper structure used to compute the cost of a slice.
16898   struct Cost {
16899     /// Are we optimizing for code size.
16900     bool ForCodeSize = false;
16901
16902     /// Various cost.
16903     unsigned Loads = 0;
16904     unsigned Truncates = 0;
16905     unsigned CrossRegisterBanksCopies = 0;
16906     unsigned ZExts = 0;
16907     unsigned Shift = 0;
16908
16909     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16910
16911     /// Get the cost of one isolated slice.
16912     Cost(const LoadedSlice &LS, bool ForCodeSize)
16913         : ForCodeSize(ForCodeSize), Loads(1) {
16914       EVT TruncType = LS.Inst->getValueType(0);
16915       EVT LoadedType = LS.getLoadedType();
16916       if (TruncType != LoadedType &&
16917           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16918         ZExts = 1;
16919     }
16920
16921     /// Account for slicing gain in the current cost.
16922     /// Slicing provide a few gains like removing a shift or a
16923     /// truncate. This method allows to grow the cost of the original
16924     /// load with the gain from this slice.
16925     void addSliceGain(const LoadedSlice &LS) {
16926       // Each slice saves a truncate.
16927       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16928       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16929                               LS.Inst->getValueType(0)))
16930         ++Truncates;
16931       // If there is a shift amount, this slice gets rid of it.
16932       if (LS.Shift)
16933         ++Shift;
16934       // If this slice can merge a cross register bank copy, account for it.
16935       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16936         ++CrossRegisterBanksCopies;
16937     }
16938
16939     Cost &operator+=(const Cost &RHS) {
16940       Loads += RHS.Loads;
16941       Truncates += RHS.Truncates;
16942       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16943       ZExts += RHS.ZExts;
16944       Shift += RHS.Shift;
16945       return *this;
16946     }
16947
16948     bool operator==(const Cost &RHS) const {
16949       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16950              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16951              ZExts == RHS.ZExts && Shift == RHS.Shift;
16952     }
16953
16954     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16955
16956     bool operator<(const Cost &RHS) const {
16957       // Assume cross register banks copies are as expensive as loads.
16958       // FIXME: Do we want some more target hooks?
16959       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16960       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16961       // Unless we are optimizing for code size, consider the
16962       // expensive operation first.
16963       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16964         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16965       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16966              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16967     }
16968
16969     bool operator>(const Cost &RHS) const { return RHS < *this; }
16970
16971     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16972
16973     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16974   };
16975
16976   // The last instruction that represent the slice. This should be a
16977   // truncate instruction.
16978   SDNode *Inst;
16979
16980   // The original load instruction.
16981   LoadSDNode *Origin;
16982
16983   // The right shift amount in bits from the original load.
16984   unsigned Shift;
16985
16986   // The DAG from which Origin came from.
16987   // This is used to get some contextual information about legal types, etc.
16988   SelectionDAG *DAG;
16989
16990   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16991               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16992       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16993
16994   /// Get the bits used in a chunk of bits \p BitWidth large.
16995   /// \return Result is \p BitWidth and has used bits set to 1 and
16996   ///         not used bits set to 0.
16997   APInt getUsedBits() const {
16998     // Reproduce the trunc(lshr) sequence:
16999     // - Start from the truncated value.
17000     // - Zero extend to the desired bit width.
17001     // - Shift left.
17002     assert(Origin && "No original load to compare against.");
17003     unsigned BitWidth = Origin->getValueSizeInBits(0);
17004     assert(Inst && "This slice is not bound to an instruction");
17005     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
17006            "Extracted slice is bigger than the whole type!");
17007     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
17008     UsedBits.setAllBits();
17009     UsedBits = UsedBits.zext(BitWidth);
17010     UsedBits <<= Shift;
17011     return UsedBits;
17012   }
17013
17014   /// Get the size of the slice to be loaded in bytes.
17015   unsigned getLoadedSize() const {
17016     unsigned SliceSize = getUsedBits().countPopulation();
17017     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
17018     return SliceSize / 8;
17019   }
17020
17021   /// Get the type that will be loaded for this slice.
17022   /// Note: This may not be the final type for the slice.
17023   EVT getLoadedType() const {
17024     assert(DAG && "Missing context");
17025     LLVMContext &Ctxt = *DAG->getContext();
17026     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
17027   }
17028
17029   /// Get the alignment of the load used for this slice.
17030   Align getAlign() const {
17031     Align Alignment = Origin->getAlign();
17032     uint64_t Offset = getOffsetFromBase();
17033     if (Offset != 0)
17034       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
17035     return Alignment;
17036   }
17037
17038   /// Check if this slice can be rewritten with legal operations.
17039   bool isLegal() const {
17040     // An invalid slice is not legal.
17041     if (!Origin || !Inst || !DAG)
17042       return false;
17043
17044     // Offsets are for indexed load only, we do not handle that.
17045     if (!Origin->getOffset().isUndef())
17046       return false;
17047
17048     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
17049
17050     // Check that the type is legal.
17051     EVT SliceType = getLoadedType();
17052     if (!TLI.isTypeLegal(SliceType))
17053       return false;
17054
17055     // Check that the load is legal for this type.
17056     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
17057       return false;
17058
17059     // Check that the offset can be computed.
17060     // 1. Check its type.
17061     EVT PtrType = Origin->getBasePtr().getValueType();
17062     if (PtrType == MVT::Untyped || PtrType.isExtended())
17063       return false;
17064
17065     // 2. Check that it fits in the immediate.
17066     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
17067       return false;
17068
17069     // 3. Check that the computation is legal.
17070     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
17071       return false;
17072
17073     // Check that the zext is legal if it needs one.
17074     EVT TruncateType = Inst->getValueType(0);
17075     if (TruncateType != SliceType &&
17076         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
17077       return false;
17078
17079     return true;
17080   }
17081
17082   /// Get the offset in bytes of this slice in the original chunk of
17083   /// bits.
17084   /// \pre DAG != nullptr.
17085   uint64_t getOffsetFromBase() const {
17086     assert(DAG && "Missing context.");
17087     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
17088     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
17089     uint64_t Offset = Shift / 8;
17090     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
17091     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
17092            "The size of the original loaded type is not a multiple of a"
17093            " byte.");
17094     // If Offset is bigger than TySizeInBytes, it means we are loading all
17095     // zeros. This should have been optimized before in the process.
17096     assert(TySizeInBytes > Offset &&
17097            "Invalid shift amount for given loaded size");
17098     if (IsBigEndian)
17099       Offset = TySizeInBytes - Offset - getLoadedSize();
17100     return Offset;
17101   }
17102
17103   /// Generate the sequence of instructions to load the slice
17104   /// represented by this object and redirect the uses of this slice to
17105   /// this new sequence of instructions.
17106   /// \pre this->Inst && this->Origin are valid Instructions and this
17107   /// object passed the legal check: LoadedSlice::isLegal returned true.
17108   /// \return The last instruction of the sequence used to load the slice.
17109   SDValue loadSlice() const {
17110     assert(Inst && Origin && "Unable to replace a non-existing slice.");
17111     const SDValue &OldBaseAddr = Origin->getBasePtr();
17112     SDValue BaseAddr = OldBaseAddr;
17113     // Get the offset in that chunk of bytes w.r.t. the endianness.
17114     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
17115     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
17116     if (Offset) {
17117       // BaseAddr = BaseAddr + Offset.
17118       EVT ArithType = BaseAddr.getValueType();
17119       SDLoc DL(Origin);
17120       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
17121                               DAG->getConstant(Offset, DL, ArithType));
17122     }
17123
17124     // Create the type of the loaded slice according to its size.
17125     EVT SliceType = getLoadedType();
17126
17127     // Create the load for the slice.
17128     SDValue LastInst =
17129         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
17130                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
17131                      Origin->getMemOperand()->getFlags());
17132     // If the final type is not the same as the loaded type, this means that
17133     // we have to pad with zero. Create a zero extend for that.
17134     EVT FinalType = Inst->getValueType(0);
17135     if (SliceType != FinalType)
17136       LastInst =
17137           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
17138     return LastInst;
17139   }
17140
17141   /// Check if this slice can be merged with an expensive cross register
17142   /// bank copy. E.g.,
17143   /// i = load i32
17144   /// f = bitcast i32 i to float
17145   bool canMergeExpensiveCrossRegisterBankCopy() const {
17146     if (!Inst || !Inst->hasOneUse())
17147       return false;
17148     SDNode *Use = *Inst->use_begin();
17149     if (Use->getOpcode() != ISD::BITCAST)
17150       return false;
17151     assert(DAG && "Missing context");
17152     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
17153     EVT ResVT = Use->getValueType(0);
17154     const TargetRegisterClass *ResRC =
17155         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
17156     const TargetRegisterClass *ArgRC =
17157         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
17158                            Use->getOperand(0)->isDivergent());
17159     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
17160       return false;
17161
17162     // At this point, we know that we perform a cross-register-bank copy.
17163     // Check if it is expensive.
17164     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
17165     // Assume bitcasts are cheap, unless both register classes do not
17166     // explicitly share a common sub class.
17167     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
17168       return false;
17169
17170     // Check if it will be merged with the load.
17171     // 1. Check the alignment / fast memory access constraint.
17172     bool IsFast = false;
17173     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
17174                                 Origin->getAddressSpace(), getAlign(),
17175                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
17176         !IsFast)
17177       return false;
17178
17179     // 2. Check that the load is a legal operation for that type.
17180     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
17181       return false;
17182
17183     // 3. Check that we do not have a zext in the way.
17184     if (Inst->getValueType(0) != getLoadedType())
17185       return false;
17186
17187     return true;
17188   }
17189 };
17190
17191 } // end anonymous namespace
17192
17193 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
17194 /// \p UsedBits looks like 0..0 1..1 0..0.
17195 static bool areUsedBitsDense(const APInt &UsedBits) {
17196   // If all the bits are one, this is dense!
17197   if (UsedBits.isAllOnes())
17198     return true;
17199
17200   // Get rid of the unused bits on the right.
17201   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
17202   // Get rid of the unused bits on the left.
17203   if (NarrowedUsedBits.countLeadingZeros())
17204     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
17205   // Check that the chunk of bits is completely used.
17206   return NarrowedUsedBits.isAllOnes();
17207 }
17208
17209 /// Check whether or not \p First and \p Second are next to each other
17210 /// in memory. This means that there is no hole between the bits loaded
17211 /// by \p First and the bits loaded by \p Second.
17212 static bool areSlicesNextToEachOther(const LoadedSlice &First,
17213                                      const LoadedSlice &Second) {
17214   assert(First.Origin == Second.Origin && First.Origin &&
17215          "Unable to match different memory origins.");
17216   APInt UsedBits = First.getUsedBits();
17217   assert((UsedBits & Second.getUsedBits()) == 0 &&
17218          "Slices are not supposed to overlap.");
17219   UsedBits |= Second.getUsedBits();
17220   return areUsedBitsDense(UsedBits);
17221 }
17222
17223 /// Adjust the \p GlobalLSCost according to the target
17224 /// paring capabilities and the layout of the slices.
17225 /// \pre \p GlobalLSCost should account for at least as many loads as
17226 /// there is in the slices in \p LoadedSlices.
17227 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17228                                  LoadedSlice::Cost &GlobalLSCost) {
17229   unsigned NumberOfSlices = LoadedSlices.size();
17230   // If there is less than 2 elements, no pairing is possible.
17231   if (NumberOfSlices < 2)
17232     return;
17233
17234   // Sort the slices so that elements that are likely to be next to each
17235   // other in memory are next to each other in the list.
17236   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
17237     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
17238     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
17239   });
17240   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
17241   // First (resp. Second) is the first (resp. Second) potentially candidate
17242   // to be placed in a paired load.
17243   const LoadedSlice *First = nullptr;
17244   const LoadedSlice *Second = nullptr;
17245   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
17246                 // Set the beginning of the pair.
17247                                                            First = Second) {
17248     Second = &LoadedSlices[CurrSlice];
17249
17250     // If First is NULL, it means we start a new pair.
17251     // Get to the next slice.
17252     if (!First)
17253       continue;
17254
17255     EVT LoadedType = First->getLoadedType();
17256
17257     // If the types of the slices are different, we cannot pair them.
17258     if (LoadedType != Second->getLoadedType())
17259       continue;
17260
17261     // Check if the target supplies paired loads for this type.
17262     Align RequiredAlignment;
17263     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
17264       // move to the next pair, this type is hopeless.
17265       Second = nullptr;
17266       continue;
17267     }
17268     // Check if we meet the alignment requirement.
17269     if (First->getAlign() < RequiredAlignment)
17270       continue;
17271
17272     // Check that both loads are next to each other in memory.
17273     if (!areSlicesNextToEachOther(*First, *Second))
17274       continue;
17275
17276     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
17277     --GlobalLSCost.Loads;
17278     // Move to the next pair.
17279     Second = nullptr;
17280   }
17281 }
17282
17283 /// Check the profitability of all involved LoadedSlice.
17284 /// Currently, it is considered profitable if there is exactly two
17285 /// involved slices (1) which are (2) next to each other in memory, and
17286 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
17287 ///
17288 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
17289 /// the elements themselves.
17290 ///
17291 /// FIXME: When the cost model will be mature enough, we can relax
17292 /// constraints (1) and (2).
17293 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17294                                 const APInt &UsedBits, bool ForCodeSize) {
17295   unsigned NumberOfSlices = LoadedSlices.size();
17296   if (StressLoadSlicing)
17297     return NumberOfSlices > 1;
17298
17299   // Check (1).
17300   if (NumberOfSlices != 2)
17301     return false;
17302
17303   // Check (2).
17304   if (!areUsedBitsDense(UsedBits))
17305     return false;
17306
17307   // Check (3).
17308   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
17309   // The original code has one big load.
17310   OrigCost.Loads = 1;
17311   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
17312     const LoadedSlice &LS = LoadedSlices[CurrSlice];
17313     // Accumulate the cost of all the slices.
17314     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
17315     GlobalSlicingCost += SliceCost;
17316
17317     // Account as cost in the original configuration the gain obtained
17318     // with the current slices.
17319     OrigCost.addSliceGain(LS);
17320   }
17321
17322   // If the target supports paired load, adjust the cost accordingly.
17323   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
17324   return OrigCost > GlobalSlicingCost;
17325 }
17326
17327 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
17328 /// operations, split it in the various pieces being extracted.
17329 ///
17330 /// This sort of thing is introduced by SROA.
17331 /// This slicing takes care not to insert overlapping loads.
17332 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
17333 bool DAGCombiner::SliceUpLoad(SDNode *N) {
17334   if (Level < AfterLegalizeDAG)
17335     return false;
17336
17337   LoadSDNode *LD = cast<LoadSDNode>(N);
17338   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
17339       !LD->getValueType(0).isInteger())
17340     return false;
17341
17342   // The algorithm to split up a load of a scalable vector into individual
17343   // elements currently requires knowing the length of the loaded type,
17344   // so will need adjusting to work on scalable vectors.
17345   if (LD->getValueType(0).isScalableVector())
17346     return false;
17347
17348   // Keep track of already used bits to detect overlapping values.
17349   // In that case, we will just abort the transformation.
17350   APInt UsedBits(LD->getValueSizeInBits(0), 0);
17351
17352   SmallVector<LoadedSlice, 4> LoadedSlices;
17353
17354   // Check if this load is used as several smaller chunks of bits.
17355   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
17356   // of computation for each trunc.
17357   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
17358        UI != UIEnd; ++UI) {
17359     // Skip the uses of the chain.
17360     if (UI.getUse().getResNo() != 0)
17361       continue;
17362
17363     SDNode *User = *UI;
17364     unsigned Shift = 0;
17365
17366     // Check if this is a trunc(lshr).
17367     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
17368         isa<ConstantSDNode>(User->getOperand(1))) {
17369       Shift = User->getConstantOperandVal(1);
17370       User = *User->use_begin();
17371     }
17372
17373     // At this point, User is a Truncate, iff we encountered, trunc or
17374     // trunc(lshr).
17375     if (User->getOpcode() != ISD::TRUNCATE)
17376       return false;
17377
17378     // The width of the type must be a power of 2 and greater than 8-bits.
17379     // Otherwise the load cannot be represented in LLVM IR.
17380     // Moreover, if we shifted with a non-8-bits multiple, the slice
17381     // will be across several bytes. We do not support that.
17382     unsigned Width = User->getValueSizeInBits(0);
17383     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
17384       return false;
17385
17386     // Build the slice for this chain of computations.
17387     LoadedSlice LS(User, LD, Shift, &DAG);
17388     APInt CurrentUsedBits = LS.getUsedBits();
17389
17390     // Check if this slice overlaps with another.
17391     if ((CurrentUsedBits & UsedBits) != 0)
17392       return false;
17393     // Update the bits used globally.
17394     UsedBits |= CurrentUsedBits;
17395
17396     // Check if the new slice would be legal.
17397     if (!LS.isLegal())
17398       return false;
17399
17400     // Record the slice.
17401     LoadedSlices.push_back(LS);
17402   }
17403
17404   // Abort slicing if it does not seem to be profitable.
17405   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
17406     return false;
17407
17408   ++SlicedLoads;
17409
17410   // Rewrite each chain to use an independent load.
17411   // By construction, each chain can be represented by a unique load.
17412
17413   // Prepare the argument for the new token factor for all the slices.
17414   SmallVector<SDValue, 8> ArgChains;
17415   for (const LoadedSlice &LS : LoadedSlices) {
17416     SDValue SliceInst = LS.loadSlice();
17417     CombineTo(LS.Inst, SliceInst, true);
17418     if (SliceInst.getOpcode() != ISD::LOAD)
17419       SliceInst = SliceInst.getOperand(0);
17420     assert(SliceInst->getOpcode() == ISD::LOAD &&
17421            "It takes more than a zext to get to the loaded slice!!");
17422     ArgChains.push_back(SliceInst.getValue(1));
17423   }
17424
17425   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
17426                               ArgChains);
17427   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
17428   AddToWorklist(Chain.getNode());
17429   return true;
17430 }
17431
17432 /// Check to see if V is (and load (ptr), imm), where the load is having
17433 /// specific bytes cleared out.  If so, return the byte size being masked out
17434 /// and the shift amount.
17435 static std::pair<unsigned, unsigned>
17436 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
17437   std::pair<unsigned, unsigned> Result(0, 0);
17438
17439   // Check for the structure we're looking for.
17440   if (V->getOpcode() != ISD::AND ||
17441       !isa<ConstantSDNode>(V->getOperand(1)) ||
17442       !ISD::isNormalLoad(V->getOperand(0).getNode()))
17443     return Result;
17444
17445   // Check the chain and pointer.
17446   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
17447   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
17448
17449   // This only handles simple types.
17450   if (V.getValueType() != MVT::i16 &&
17451       V.getValueType() != MVT::i32 &&
17452       V.getValueType() != MVT::i64)
17453     return Result;
17454
17455   // Check the constant mask.  Invert it so that the bits being masked out are
17456   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
17457   // follow the sign bit for uniformity.
17458   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
17459   unsigned NotMaskLZ = countLeadingZeros(NotMask);
17460   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
17461   unsigned NotMaskTZ = countTrailingZeros(NotMask);
17462   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
17463   if (NotMaskLZ == 64) return Result;  // All zero mask.
17464
17465   // See if we have a continuous run of bits.  If so, we have 0*1+0*
17466   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
17467     return Result;
17468
17469   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
17470   if (V.getValueType() != MVT::i64 && NotMaskLZ)
17471     NotMaskLZ -= 64-V.getValueSizeInBits();
17472
17473   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
17474   switch (MaskedBytes) {
17475   case 1:
17476   case 2:
17477   case 4: break;
17478   default: return Result; // All one mask, or 5-byte mask.
17479   }
17480
17481   // Verify that the first bit starts at a multiple of mask so that the access
17482   // is aligned the same as the access width.
17483   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
17484
17485   // For narrowing to be valid, it must be the case that the load the
17486   // immediately preceding memory operation before the store.
17487   if (LD == Chain.getNode())
17488     ; // ok.
17489   else if (Chain->getOpcode() == ISD::TokenFactor &&
17490            SDValue(LD, 1).hasOneUse()) {
17491     // LD has only 1 chain use so they are no indirect dependencies.
17492     if (!LD->isOperandOf(Chain.getNode()))
17493       return Result;
17494   } else
17495     return Result; // Fail.
17496
17497   Result.first = MaskedBytes;
17498   Result.second = NotMaskTZ/8;
17499   return Result;
17500 }
17501
17502 /// Check to see if IVal is something that provides a value as specified by
17503 /// MaskInfo. If so, replace the specified store with a narrower store of
17504 /// truncated IVal.
17505 static SDValue
17506 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17507                                 SDValue IVal, StoreSDNode *St,
17508                                 DAGCombiner *DC) {
17509   unsigned NumBytes = MaskInfo.first;
17510   unsigned ByteShift = MaskInfo.second;
17511   SelectionDAG &DAG = DC->getDAG();
17512
17513   // Check to see if IVal is all zeros in the part being masked in by the 'or'
17514   // that uses this.  If not, this is not a replacement.
17515   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17516                                   ByteShift*8, (ByteShift+NumBytes)*8);
17517   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17518
17519   // Check that it is legal on the target to do this.  It is legal if the new
17520   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17521   // legalization. If the source type is legal, but the store type isn't, see
17522   // if we can use a truncating store.
17523   MVT VT = MVT::getIntegerVT(NumBytes * 8);
17524   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17525   bool UseTruncStore;
17526   if (DC->isTypeLegal(VT))
17527     UseTruncStore = false;
17528   else if (TLI.isTypeLegal(IVal.getValueType()) &&
17529            TLI.isTruncStoreLegal(IVal.getValueType(), VT))
17530     UseTruncStore = true;
17531   else
17532     return SDValue();
17533   // Check that the target doesn't think this is a bad idea.
17534   if (St->getMemOperand() &&
17535       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17536                               *St->getMemOperand()))
17537     return SDValue();
17538
17539   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
17540   // shifted by ByteShift and truncated down to NumBytes.
17541   if (ByteShift) {
17542     SDLoc DL(IVal);
17543     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17544                        DAG.getConstant(ByteShift*8, DL,
17545                                     DC->getShiftAmountTy(IVal.getValueType())));
17546   }
17547
17548   // Figure out the offset for the store and the alignment of the access.
17549   unsigned StOffset;
17550   if (DAG.getDataLayout().isLittleEndian())
17551     StOffset = ByteShift;
17552   else
17553     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17554
17555   SDValue Ptr = St->getBasePtr();
17556   if (StOffset) {
17557     SDLoc DL(IVal);
17558     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17559   }
17560
17561   ++OpsNarrowed;
17562   if (UseTruncStore)
17563     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
17564                              St->getPointerInfo().getWithOffset(StOffset),
17565                              VT, St->getOriginalAlign());
17566
17567   // Truncate down to the new size.
17568   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17569
17570   return DAG
17571       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17572                 St->getPointerInfo().getWithOffset(StOffset),
17573                 St->getOriginalAlign());
17574 }
17575
17576 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17577 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17578 /// narrowing the load and store if it would end up being a win for performance
17579 /// or code size.
17580 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17581   StoreSDNode *ST  = cast<StoreSDNode>(N);
17582   if (!ST->isSimple())
17583     return SDValue();
17584
17585   SDValue Chain = ST->getChain();
17586   SDValue Value = ST->getValue();
17587   SDValue Ptr   = ST->getBasePtr();
17588   EVT VT = Value.getValueType();
17589
17590   if (ST->isTruncatingStore() || VT.isVector())
17591     return SDValue();
17592
17593   unsigned Opc = Value.getOpcode();
17594
17595   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17596       !Value.hasOneUse())
17597     return SDValue();
17598
17599   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17600   // is a byte mask indicating a consecutive number of bytes, check to see if
17601   // Y is known to provide just those bytes.  If so, we try to replace the
17602   // load + replace + store sequence with a single (narrower) store, which makes
17603   // the load dead.
17604   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
17605     std::pair<unsigned, unsigned> MaskedLoad;
17606     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17607     if (MaskedLoad.first)
17608       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17609                                                   Value.getOperand(1), ST,this))
17610         return NewST;
17611
17612     // Or is commutative, so try swapping X and Y.
17613     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17614     if (MaskedLoad.first)
17615       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17616                                                   Value.getOperand(0), ST,this))
17617         return NewST;
17618   }
17619
17620   if (!EnableReduceLoadOpStoreWidth)
17621     return SDValue();
17622
17623   if (Value.getOperand(1).getOpcode() != ISD::Constant)
17624     return SDValue();
17625
17626   SDValue N0 = Value.getOperand(0);
17627   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17628       Chain == SDValue(N0.getNode(), 1)) {
17629     LoadSDNode *LD = cast<LoadSDNode>(N0);
17630     if (LD->getBasePtr() != Ptr ||
17631         LD->getPointerInfo().getAddrSpace() !=
17632         ST->getPointerInfo().getAddrSpace())
17633       return SDValue();
17634
17635     // Find the type to narrow it the load / op / store to.
17636     SDValue N1 = Value.getOperand(1);
17637     unsigned BitWidth = N1.getValueSizeInBits();
17638     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17639     if (Opc == ISD::AND)
17640       Imm ^= APInt::getAllOnes(BitWidth);
17641     if (Imm == 0 || Imm.isAllOnes())
17642       return SDValue();
17643     unsigned ShAmt = Imm.countTrailingZeros();
17644     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17645     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17646     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17647     // The narrowing should be profitable, the load/store operation should be
17648     // legal (or custom) and the store size should be equal to the NewVT width.
17649     while (NewBW < BitWidth &&
17650            (NewVT.getStoreSizeInBits() != NewBW ||
17651             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17652             !TLI.isNarrowingProfitable(VT, NewVT))) {
17653       NewBW = NextPowerOf2(NewBW);
17654       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17655     }
17656     if (NewBW >= BitWidth)
17657       return SDValue();
17658
17659     // If the lsb changed does not start at the type bitwidth boundary,
17660     // start at the previous one.
17661     if (ShAmt % NewBW)
17662       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17663     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17664                                    std::min(BitWidth, ShAmt + NewBW));
17665     if ((Imm & Mask) == Imm) {
17666       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17667       if (Opc == ISD::AND)
17668         NewImm ^= APInt::getAllOnes(NewBW);
17669       uint64_t PtrOff = ShAmt / 8;
17670       // For big endian targets, we need to adjust the offset to the pointer to
17671       // load the correct bytes.
17672       if (DAG.getDataLayout().isBigEndian())
17673         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17674
17675       bool IsFast = false;
17676       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17677       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17678                                   LD->getAddressSpace(), NewAlign,
17679                                   LD->getMemOperand()->getFlags(), &IsFast) ||
17680           !IsFast)
17681         return SDValue();
17682
17683       SDValue NewPtr =
17684           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17685       SDValue NewLD =
17686           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17687                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17688                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
17689       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17690                                    DAG.getConstant(NewImm, SDLoc(Value),
17691                                                    NewVT));
17692       SDValue NewST =
17693           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17694                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17695
17696       AddToWorklist(NewPtr.getNode());
17697       AddToWorklist(NewLD.getNode());
17698       AddToWorklist(NewVal.getNode());
17699       WorklistRemover DeadNodes(*this);
17700       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17701       ++OpsNarrowed;
17702       return NewST;
17703     }
17704   }
17705
17706   return SDValue();
17707 }
17708
17709 /// For a given floating point load / store pair, if the load value isn't used
17710 /// by any other operations, then consider transforming the pair to integer
17711 /// load / store operations if the target deems the transformation profitable.
17712 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17713   StoreSDNode *ST  = cast<StoreSDNode>(N);
17714   SDValue Value = ST->getValue();
17715   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17716       Value.hasOneUse()) {
17717     LoadSDNode *LD = cast<LoadSDNode>(Value);
17718     EVT VT = LD->getMemoryVT();
17719     if (!VT.isFloatingPoint() ||
17720         VT != ST->getMemoryVT() ||
17721         LD->isNonTemporal() ||
17722         ST->isNonTemporal() ||
17723         LD->getPointerInfo().getAddrSpace() != 0 ||
17724         ST->getPointerInfo().getAddrSpace() != 0)
17725       return SDValue();
17726
17727     TypeSize VTSize = VT.getSizeInBits();
17728
17729     // We don't know the size of scalable types at compile time so we cannot
17730     // create an integer of the equivalent size.
17731     if (VTSize.isScalable())
17732       return SDValue();
17733
17734     bool FastLD = false, FastST = false;
17735     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17736     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17737         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17738         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17739         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17740         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17741                                 *LD->getMemOperand(), &FastLD) ||
17742         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17743                                 *ST->getMemOperand(), &FastST) ||
17744         !FastLD || !FastST)
17745       return SDValue();
17746
17747     SDValue NewLD =
17748         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17749                     LD->getPointerInfo(), LD->getAlign());
17750
17751     SDValue NewST =
17752         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17753                      ST->getPointerInfo(), ST->getAlign());
17754
17755     AddToWorklist(NewLD.getNode());
17756     AddToWorklist(NewST.getNode());
17757     WorklistRemover DeadNodes(*this);
17758     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17759     ++LdStFP2Int;
17760     return NewST;
17761   }
17762
17763   return SDValue();
17764 }
17765
17766 // This is a helper function for visitMUL to check the profitability
17767 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17768 // MulNode is the original multiply, AddNode is (add x, c1),
17769 // and ConstNode is c2.
17770 //
17771 // If the (add x, c1) has multiple uses, we could increase
17772 // the number of adds if we make this transformation.
17773 // It would only be worth doing this if we can remove a
17774 // multiply in the process. Check for that here.
17775 // To illustrate:
17776 //     (A + c1) * c3
17777 //     (A + c2) * c3
17778 // We're checking for cases where we have common "c3 * A" expressions.
17779 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
17780                                               SDValue ConstNode) {
17781   APInt Val;
17782
17783   // If the add only has one use, and the target thinks the folding is
17784   // profitable or does not lead to worse code, this would be OK to do.
17785   if (AddNode->hasOneUse() &&
17786       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17787     return true;
17788
17789   // Walk all the users of the constant with which we're multiplying.
17790   for (SDNode *Use : ConstNode->uses()) {
17791     if (Use == MulNode) // This use is the one we're on right now. Skip it.
17792       continue;
17793
17794     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17795       SDNode *OtherOp;
17796       SDNode *MulVar = AddNode.getOperand(0).getNode();
17797
17798       // OtherOp is what we're multiplying against the constant.
17799       if (Use->getOperand(0) == ConstNode)
17800         OtherOp = Use->getOperand(1).getNode();
17801       else
17802         OtherOp = Use->getOperand(0).getNode();
17803
17804       // Check to see if multiply is with the same operand of our "add".
17805       //
17806       //     ConstNode  = CONST
17807       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
17808       //     ...
17809       //     AddNode  = (A + c1)  <-- MulVar is A.
17810       //         = AddNode * ConstNode   <-- current visiting instruction.
17811       //
17812       // If we make this transformation, we will have a common
17813       // multiply (ConstNode * A) that we can save.
17814       if (OtherOp == MulVar)
17815         return true;
17816
17817       // Now check to see if a future expansion will give us a common
17818       // multiply.
17819       //
17820       //     ConstNode  = CONST
17821       //     AddNode    = (A + c1)
17822       //     ...   = AddNode * ConstNode <-- current visiting instruction.
17823       //     ...
17824       //     OtherOp = (A + c2)
17825       //     Use     = OtherOp * ConstNode <-- visiting Use.
17826       //
17827       // If we make this transformation, we will have a common
17828       // multiply (CONST * A) after we also do the same transformation
17829       // to the "t2" instruction.
17830       if (OtherOp->getOpcode() == ISD::ADD &&
17831           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17832           OtherOp->getOperand(0).getNode() == MulVar)
17833         return true;
17834     }
17835   }
17836
17837   // Didn't find a case where this would be profitable.
17838   return false;
17839 }
17840
17841 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17842                                          unsigned NumStores) {
17843   SmallVector<SDValue, 8> Chains;
17844   SmallPtrSet<const SDNode *, 8> Visited;
17845   SDLoc StoreDL(StoreNodes[0].MemNode);
17846
17847   for (unsigned i = 0; i < NumStores; ++i) {
17848     Visited.insert(StoreNodes[i].MemNode);
17849   }
17850
17851   // don't include nodes that are children or repeated nodes.
17852   for (unsigned i = 0; i < NumStores; ++i) {
17853     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17854       Chains.push_back(StoreNodes[i].MemNode->getChain());
17855   }
17856
17857   assert(Chains.size() > 0 && "Chain should have generated a chain");
17858   return DAG.getTokenFactor(StoreDL, Chains);
17859 }
17860
17861 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17862     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17863     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17864   // Make sure we have something to merge.
17865   if (NumStores < 2)
17866     return false;
17867
17868   assert((!UseTrunc || !UseVector) &&
17869          "This optimization cannot emit a vector truncating store");
17870
17871   // The latest Node in the DAG.
17872   SDLoc DL(StoreNodes[0].MemNode);
17873
17874   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17875   unsigned SizeInBits = NumStores * ElementSizeBits;
17876   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17877
17878   Optional<MachineMemOperand::Flags> Flags;
17879   AAMDNodes AAInfo;
17880   for (unsigned I = 0; I != NumStores; ++I) {
17881     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17882     if (!Flags) {
17883       Flags = St->getMemOperand()->getFlags();
17884       AAInfo = St->getAAInfo();
17885       continue;
17886     }
17887     // Skip merging if there's an inconsistent flag.
17888     if (Flags != St->getMemOperand()->getFlags())
17889       return false;
17890     // Concatenate AA metadata.
17891     AAInfo = AAInfo.concat(St->getAAInfo());
17892   }
17893
17894   EVT StoreTy;
17895   if (UseVector) {
17896     unsigned Elts = NumStores * NumMemElts;
17897     // Get the type for the merged vector store.
17898     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17899   } else
17900     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17901
17902   SDValue StoredVal;
17903   if (UseVector) {
17904     if (IsConstantSrc) {
17905       SmallVector<SDValue, 8> BuildVector;
17906       for (unsigned I = 0; I != NumStores; ++I) {
17907         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17908         SDValue Val = St->getValue();
17909         // If constant is of the wrong type, convert it now.
17910         if (MemVT != Val.getValueType()) {
17911           Val = peekThroughBitcasts(Val);
17912           // Deal with constants of wrong size.
17913           if (ElementSizeBits != Val.getValueSizeInBits()) {
17914             EVT IntMemVT =
17915                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17916             if (isa<ConstantFPSDNode>(Val)) {
17917               // Not clear how to truncate FP values.
17918               return false;
17919             }
17920
17921             if (auto *C = dyn_cast<ConstantSDNode>(Val))
17922               Val = DAG.getConstant(C->getAPIntValue()
17923                                         .zextOrTrunc(Val.getValueSizeInBits())
17924                                         .zextOrTrunc(ElementSizeBits),
17925                                     SDLoc(C), IntMemVT);
17926           }
17927           // Make sure correctly size type is the correct type.
17928           Val = DAG.getBitcast(MemVT, Val);
17929         }
17930         BuildVector.push_back(Val);
17931       }
17932       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17933                                                : ISD::BUILD_VECTOR,
17934                               DL, StoreTy, BuildVector);
17935     } else {
17936       SmallVector<SDValue, 8> Ops;
17937       for (unsigned i = 0; i < NumStores; ++i) {
17938         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17939         SDValue Val = peekThroughBitcasts(St->getValue());
17940         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17941         // type MemVT. If the underlying value is not the correct
17942         // type, but it is an extraction of an appropriate vector we
17943         // can recast Val to be of the correct type. This may require
17944         // converting between EXTRACT_VECTOR_ELT and
17945         // EXTRACT_SUBVECTOR.
17946         if ((MemVT != Val.getValueType()) &&
17947             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17948              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17949           EVT MemVTScalarTy = MemVT.getScalarType();
17950           // We may need to add a bitcast here to get types to line up.
17951           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17952             Val = DAG.getBitcast(MemVT, Val);
17953           } else {
17954             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17955                                             : ISD::EXTRACT_VECTOR_ELT;
17956             SDValue Vec = Val.getOperand(0);
17957             SDValue Idx = Val.getOperand(1);
17958             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17959           }
17960         }
17961         Ops.push_back(Val);
17962       }
17963
17964       // Build the extracted vector elements back into a vector.
17965       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17966                                                : ISD::BUILD_VECTOR,
17967                               DL, StoreTy, Ops);
17968     }
17969   } else {
17970     // We should always use a vector store when merging extracted vector
17971     // elements, so this path implies a store of constants.
17972     assert(IsConstantSrc && "Merged vector elements should use vector store");
17973
17974     APInt StoreInt(SizeInBits, 0);
17975
17976     // Construct a single integer constant which is made of the smaller
17977     // constant inputs.
17978     bool IsLE = DAG.getDataLayout().isLittleEndian();
17979     for (unsigned i = 0; i < NumStores; ++i) {
17980       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17981       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17982
17983       SDValue Val = St->getValue();
17984       Val = peekThroughBitcasts(Val);
17985       StoreInt <<= ElementSizeBits;
17986       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17987         StoreInt |= C->getAPIntValue()
17988                         .zextOrTrunc(ElementSizeBits)
17989                         .zextOrTrunc(SizeInBits);
17990       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17991         StoreInt |= C->getValueAPF()
17992                         .bitcastToAPInt()
17993                         .zextOrTrunc(ElementSizeBits)
17994                         .zextOrTrunc(SizeInBits);
17995         // If fp truncation is necessary give up for now.
17996         if (MemVT.getSizeInBits() != ElementSizeBits)
17997           return false;
17998       } else {
17999         llvm_unreachable("Invalid constant element type");
18000       }
18001     }
18002
18003     // Create the new Load and Store operations.
18004     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
18005   }
18006
18007   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18008   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
18009
18010   // make sure we use trunc store if it's necessary to be legal.
18011   SDValue NewStore;
18012   if (!UseTrunc) {
18013     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
18014                             FirstInChain->getPointerInfo(),
18015                             FirstInChain->getAlign(), *Flags, AAInfo);
18016   } else { // Must be realized as a trunc store
18017     EVT LegalizedStoredValTy =
18018         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
18019     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
18020     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
18021     SDValue ExtendedStoreVal =
18022         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
18023                         LegalizedStoredValTy);
18024     NewStore = DAG.getTruncStore(
18025         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
18026         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
18027         FirstInChain->getAlign(), *Flags, AAInfo);
18028   }
18029
18030   // Replace all merged stores with the new store.
18031   for (unsigned i = 0; i < NumStores; ++i)
18032     CombineTo(StoreNodes[i].MemNode, NewStore);
18033
18034   AddToWorklist(NewChain.getNode());
18035   return true;
18036 }
18037
18038 void DAGCombiner::getStoreMergeCandidates(
18039     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
18040     SDNode *&RootNode) {
18041   // This holds the base pointer, index, and the offset in bytes from the base
18042   // pointer. We must have a base and an offset. Do not handle stores to undef
18043   // base pointers.
18044   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18045   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
18046     return;
18047
18048   SDValue Val = peekThroughBitcasts(St->getValue());
18049   StoreSource StoreSrc = getStoreSource(Val);
18050   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
18051
18052   // Match on loadbaseptr if relevant.
18053   EVT MemVT = St->getMemoryVT();
18054   BaseIndexOffset LBasePtr;
18055   EVT LoadVT;
18056   if (StoreSrc == StoreSource::Load) {
18057     auto *Ld = cast<LoadSDNode>(Val);
18058     LBasePtr = BaseIndexOffset::match(Ld, DAG);
18059     LoadVT = Ld->getMemoryVT();
18060     // Load and store should be the same type.
18061     if (MemVT != LoadVT)
18062       return;
18063     // Loads must only have one use.
18064     if (!Ld->hasNUsesOfValue(1, 0))
18065       return;
18066     // The memory operands must not be volatile/indexed/atomic.
18067     // TODO: May be able to relax for unordered atomics (see D66309)
18068     if (!Ld->isSimple() || Ld->isIndexed())
18069       return;
18070   }
18071   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
18072                             int64_t &Offset) -> bool {
18073     // The memory operands must not be volatile/indexed/atomic.
18074     // TODO: May be able to relax for unordered atomics (see D66309)
18075     if (!Other->isSimple() || Other->isIndexed())
18076       return false;
18077     // Don't mix temporal stores with non-temporal stores.
18078     if (St->isNonTemporal() != Other->isNonTemporal())
18079       return false;
18080     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
18081     // Allow merging constants of different types as integers.
18082     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
18083                                            : Other->getMemoryVT() != MemVT;
18084     switch (StoreSrc) {
18085     case StoreSource::Load: {
18086       if (NoTypeMatch)
18087         return false;
18088       // The Load's Base Ptr must also match.
18089       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
18090       if (!OtherLd)
18091         return false;
18092       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
18093       if (LoadVT != OtherLd->getMemoryVT())
18094         return false;
18095       // Loads must only have one use.
18096       if (!OtherLd->hasNUsesOfValue(1, 0))
18097         return false;
18098       // The memory operands must not be volatile/indexed/atomic.
18099       // TODO: May be able to relax for unordered atomics (see D66309)
18100       if (!OtherLd->isSimple() || OtherLd->isIndexed())
18101         return false;
18102       // Don't mix temporal loads with non-temporal loads.
18103       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
18104         return false;
18105       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
18106         return false;
18107       break;
18108     }
18109     case StoreSource::Constant:
18110       if (NoTypeMatch)
18111         return false;
18112       if (!isIntOrFPConstant(OtherBC))
18113         return false;
18114       break;
18115     case StoreSource::Extract:
18116       // Do not merge truncated stores here.
18117       if (Other->isTruncatingStore())
18118         return false;
18119       if (!MemVT.bitsEq(OtherBC.getValueType()))
18120         return false;
18121       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
18122           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18123         return false;
18124       break;
18125     default:
18126       llvm_unreachable("Unhandled store source for merging");
18127     }
18128     Ptr = BaseIndexOffset::match(Other, DAG);
18129     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
18130   };
18131
18132   // Check if the pair of StoreNode and the RootNode already bail out many
18133   // times which is over the limit in dependence check.
18134   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
18135                                         SDNode *RootNode) -> bool {
18136     auto RootCount = StoreRootCountMap.find(StoreNode);
18137     return RootCount != StoreRootCountMap.end() &&
18138            RootCount->second.first == RootNode &&
18139            RootCount->second.second > StoreMergeDependenceLimit;
18140   };
18141
18142   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
18143     // This must be a chain use.
18144     if (UseIter.getOperandNo() != 0)
18145       return;
18146     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
18147       BaseIndexOffset Ptr;
18148       int64_t PtrDiff;
18149       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
18150           !OverLimitInDependenceCheck(OtherStore, RootNode))
18151         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
18152     }
18153   };
18154
18155   // We looking for a root node which is an ancestor to all mergable
18156   // stores. We search up through a load, to our root and then down
18157   // through all children. For instance we will find Store{1,2,3} if
18158   // St is Store1, Store2. or Store3 where the root is not a load
18159   // which always true for nonvolatile ops. TODO: Expand
18160   // the search to find all valid candidates through multiple layers of loads.
18161   //
18162   // Root
18163   // |-------|-------|
18164   // Load    Load    Store3
18165   // |       |
18166   // Store1   Store2
18167   //
18168   // FIXME: We should be able to climb and
18169   // descend TokenFactors to find candidates as well.
18170
18171   RootNode = St->getChain().getNode();
18172
18173   unsigned NumNodesExplored = 0;
18174   const unsigned MaxSearchNodes = 1024;
18175   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
18176     RootNode = Ldn->getChain().getNode();
18177     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
18178          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
18179       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
18180         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
18181           TryToAddCandidate(I2);
18182       }
18183       // Check stores that depend on the root (e.g. Store 3 in the chart above).
18184       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
18185         TryToAddCandidate(I);
18186       }
18187     }
18188   } else {
18189     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
18190          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
18191       TryToAddCandidate(I);
18192   }
18193 }
18194
18195 // We need to check that merging these stores does not cause a loop in the
18196 // DAG. Any store candidate may depend on another candidate indirectly through
18197 // its operands. Check in parallel by searching up from operands of candidates.
18198 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
18199     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
18200     SDNode *RootNode) {
18201   // FIXME: We should be able to truncate a full search of
18202   // predecessors by doing a BFS and keeping tabs the originating
18203   // stores from which worklist nodes come from in a similar way to
18204   // TokenFactor simplfication.
18205
18206   SmallPtrSet<const SDNode *, 32> Visited;
18207   SmallVector<const SDNode *, 8> Worklist;
18208
18209   // RootNode is a predecessor to all candidates so we need not search
18210   // past it. Add RootNode (peeking through TokenFactors). Do not count
18211   // these towards size check.
18212
18213   Worklist.push_back(RootNode);
18214   while (!Worklist.empty()) {
18215     auto N = Worklist.pop_back_val();
18216     if (!Visited.insert(N).second)
18217       continue; // Already present in Visited.
18218     if (N->getOpcode() == ISD::TokenFactor) {
18219       for (SDValue Op : N->ops())
18220         Worklist.push_back(Op.getNode());
18221     }
18222   }
18223
18224   // Don't count pruning nodes towards max.
18225   unsigned int Max = 1024 + Visited.size();
18226   // Search Ops of store candidates.
18227   for (unsigned i = 0; i < NumStores; ++i) {
18228     SDNode *N = StoreNodes[i].MemNode;
18229     // Of the 4 Store Operands:
18230     //   * Chain (Op 0) -> We have already considered these
18231     //                     in candidate selection, but only by following the
18232     //                     chain dependencies. We could still have a chain
18233     //                     dependency to a load, that has a non-chain dep to
18234     //                     another load, that depends on a store, etc. So it is
18235     //                     possible to have dependencies that consist of a mix
18236     //                     of chain and non-chain deps, and we need to include
18237     //                     chain operands in the analysis here..
18238     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
18239     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
18240     //                       but aren't necessarily fromt the same base node, so
18241     //                       cycles possible (e.g. via indexed store).
18242     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
18243     //               non-indexed stores). Not constant on all targets (e.g. ARM)
18244     //               and so can participate in a cycle.
18245     for (unsigned j = 0; j < N->getNumOperands(); ++j)
18246       Worklist.push_back(N->getOperand(j).getNode());
18247   }
18248   // Search through DAG. We can stop early if we find a store node.
18249   for (unsigned i = 0; i < NumStores; ++i)
18250     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
18251                                      Max)) {
18252       // If the searching bail out, record the StoreNode and RootNode in the
18253       // StoreRootCountMap. If we have seen the pair many times over a limit,
18254       // we won't add the StoreNode into StoreNodes set again.
18255       if (Visited.size() >= Max) {
18256         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
18257         if (RootCount.first == RootNode)
18258           RootCount.second++;
18259         else
18260           RootCount = {RootNode, 1};
18261       }
18262       return false;
18263     }
18264   return true;
18265 }
18266
18267 unsigned
18268 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
18269                                   int64_t ElementSizeBytes) const {
18270   while (true) {
18271     // Find a store past the width of the first store.
18272     size_t StartIdx = 0;
18273     while ((StartIdx + 1 < StoreNodes.size()) &&
18274            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
18275               StoreNodes[StartIdx + 1].OffsetFromBase)
18276       ++StartIdx;
18277
18278     // Bail if we don't have enough candidates to merge.
18279     if (StartIdx + 1 >= StoreNodes.size())
18280       return 0;
18281
18282     // Trim stores that overlapped with the first store.
18283     if (StartIdx)
18284       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
18285
18286     // Scan the memory operations on the chain and find the first
18287     // non-consecutive store memory address.
18288     unsigned NumConsecutiveStores = 1;
18289     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
18290     // Check that the addresses are consecutive starting from the second
18291     // element in the list of stores.
18292     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
18293       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
18294       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18295         break;
18296       NumConsecutiveStores = i + 1;
18297     }
18298     if (NumConsecutiveStores > 1)
18299       return NumConsecutiveStores;
18300
18301     // There are no consecutive stores at the start of the list.
18302     // Remove the first store and try again.
18303     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
18304   }
18305 }
18306
18307 bool DAGCombiner::tryStoreMergeOfConstants(
18308     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18309     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
18310   LLVMContext &Context = *DAG.getContext();
18311   const DataLayout &DL = DAG.getDataLayout();
18312   int64_t ElementSizeBytes = MemVT.getStoreSize();
18313   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18314   bool MadeChange = false;
18315
18316   // Store the constants into memory as one consecutive store.
18317   while (NumConsecutiveStores >= 2) {
18318     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18319     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18320     Align FirstStoreAlign = FirstInChain->getAlign();
18321     unsigned LastLegalType = 1;
18322     unsigned LastLegalVectorType = 1;
18323     bool LastIntegerTrunc = false;
18324     bool NonZero = false;
18325     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
18326     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18327       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
18328       SDValue StoredVal = ST->getValue();
18329       bool IsElementZero = false;
18330       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
18331         IsElementZero = C->isZero();
18332       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
18333         IsElementZero = C->getConstantFPValue()->isNullValue();
18334       if (IsElementZero) {
18335         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
18336           FirstZeroAfterNonZero = i;
18337       }
18338       NonZero |= !IsElementZero;
18339
18340       // Find a legal type for the constant store.
18341       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18342       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18343       bool IsFast = false;
18344
18345       // Break early when size is too large to be legal.
18346       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18347         break;
18348
18349       if (TLI.isTypeLegal(StoreTy) &&
18350           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18351                                DAG.getMachineFunction()) &&
18352           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18353                                  *FirstInChain->getMemOperand(), &IsFast) &&
18354           IsFast) {
18355         LastIntegerTrunc = false;
18356         LastLegalType = i + 1;
18357         // Or check whether a truncstore is legal.
18358       } else if (TLI.getTypeAction(Context, StoreTy) ==
18359                  TargetLowering::TypePromoteInteger) {
18360         EVT LegalizedStoredValTy =
18361             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
18362         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18363             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18364                                  DAG.getMachineFunction()) &&
18365             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18366                                    *FirstInChain->getMemOperand(), &IsFast) &&
18367             IsFast) {
18368           LastIntegerTrunc = true;
18369           LastLegalType = i + 1;
18370         }
18371       }
18372
18373       // We only use vectors if the constant is known to be zero or the
18374       // target allows it and the function is not marked with the
18375       // noimplicitfloat attribute.
18376       if ((!NonZero ||
18377            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
18378           AllowVectors) {
18379         // Find a legal type for the vector store.
18380         unsigned Elts = (i + 1) * NumMemElts;
18381         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18382         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
18383             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18384             TLI.allowsMemoryAccess(Context, DL, Ty,
18385                                    *FirstInChain->getMemOperand(), &IsFast) &&
18386             IsFast)
18387           LastLegalVectorType = i + 1;
18388       }
18389     }
18390
18391     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
18392     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
18393     bool UseTrunc = LastIntegerTrunc && !UseVector;
18394
18395     // Check if we found a legal integer type that creates a meaningful
18396     // merge.
18397     if (NumElem < 2) {
18398       // We know that candidate stores are in order and of correct
18399       // shape. While there is no mergeable sequence from the
18400       // beginning one may start later in the sequence. The only
18401       // reason a merge of size N could have failed where another of
18402       // the same size would not have, is if the alignment has
18403       // improved or we've dropped a non-zero value. Drop as many
18404       // candidates as we can here.
18405       unsigned NumSkip = 1;
18406       while ((NumSkip < NumConsecutiveStores) &&
18407              (NumSkip < FirstZeroAfterNonZero) &&
18408              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18409         NumSkip++;
18410
18411       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18412       NumConsecutiveStores -= NumSkip;
18413       continue;
18414     }
18415
18416     // Check that we can merge these candidates without causing a cycle.
18417     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18418                                                   RootNode)) {
18419       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18420       NumConsecutiveStores -= NumElem;
18421       continue;
18422     }
18423
18424     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
18425                                                   /*IsConstantSrc*/ true,
18426                                                   UseVector, UseTrunc);
18427
18428     // Remove merged stores for next iteration.
18429     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18430     NumConsecutiveStores -= NumElem;
18431   }
18432   return MadeChange;
18433 }
18434
18435 bool DAGCombiner::tryStoreMergeOfExtracts(
18436     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18437     EVT MemVT, SDNode *RootNode) {
18438   LLVMContext &Context = *DAG.getContext();
18439   const DataLayout &DL = DAG.getDataLayout();
18440   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18441   bool MadeChange = false;
18442
18443   // Loop on Consecutive Stores on success.
18444   while (NumConsecutiveStores >= 2) {
18445     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18446     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18447     Align FirstStoreAlign = FirstInChain->getAlign();
18448     unsigned NumStoresToMerge = 1;
18449     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18450       // Find a legal type for the vector store.
18451       unsigned Elts = (i + 1) * NumMemElts;
18452       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
18453       bool IsFast = false;
18454
18455       // Break early when size is too large to be legal.
18456       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
18457         break;
18458
18459       if (TLI.isTypeLegal(Ty) &&
18460           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18461           TLI.allowsMemoryAccess(Context, DL, Ty,
18462                                  *FirstInChain->getMemOperand(), &IsFast) &&
18463           IsFast)
18464         NumStoresToMerge = i + 1;
18465     }
18466
18467     // Check if we found a legal integer type creating a meaningful
18468     // merge.
18469     if (NumStoresToMerge < 2) {
18470       // We know that candidate stores are in order and of correct
18471       // shape. While there is no mergeable sequence from the
18472       // beginning one may start later in the sequence. The only
18473       // reason a merge of size N could have failed where another of
18474       // the same size would not have, is if the alignment has
18475       // improved. Drop as many candidates as we can here.
18476       unsigned NumSkip = 1;
18477       while ((NumSkip < NumConsecutiveStores) &&
18478              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18479         NumSkip++;
18480
18481       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18482       NumConsecutiveStores -= NumSkip;
18483       continue;
18484     }
18485
18486     // Check that we can merge these candidates without causing a cycle.
18487     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
18488                                                   RootNode)) {
18489       StoreNodes.erase(StoreNodes.begin(),
18490                        StoreNodes.begin() + NumStoresToMerge);
18491       NumConsecutiveStores -= NumStoresToMerge;
18492       continue;
18493     }
18494
18495     MadeChange |= mergeStoresOfConstantsOrVecElts(
18496         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
18497         /*UseVector*/ true, /*UseTrunc*/ false);
18498
18499     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
18500     NumConsecutiveStores -= NumStoresToMerge;
18501   }
18502   return MadeChange;
18503 }
18504
18505 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
18506                                        unsigned NumConsecutiveStores, EVT MemVT,
18507                                        SDNode *RootNode, bool AllowVectors,
18508                                        bool IsNonTemporalStore,
18509                                        bool IsNonTemporalLoad) {
18510   LLVMContext &Context = *DAG.getContext();
18511   const DataLayout &DL = DAG.getDataLayout();
18512   int64_t ElementSizeBytes = MemVT.getStoreSize();
18513   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18514   bool MadeChange = false;
18515
18516   // Look for load nodes which are used by the stored values.
18517   SmallVector<MemOpLink, 8> LoadNodes;
18518
18519   // Find acceptable loads. Loads need to have the same chain (token factor),
18520   // must not be zext, volatile, indexed, and they must be consecutive.
18521   BaseIndexOffset LdBasePtr;
18522
18523   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18524     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18525     SDValue Val = peekThroughBitcasts(St->getValue());
18526     LoadSDNode *Ld = cast<LoadSDNode>(Val);
18527
18528     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18529     // If this is not the first ptr that we check.
18530     int64_t LdOffset = 0;
18531     if (LdBasePtr.getBase().getNode()) {
18532       // The base ptr must be the same.
18533       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18534         break;
18535     } else {
18536       // Check that all other base pointers are the same as this one.
18537       LdBasePtr = LdPtr;
18538     }
18539
18540     // We found a potential memory operand to merge.
18541     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18542   }
18543
18544   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18545     Align RequiredAlignment;
18546     bool NeedRotate = false;
18547     if (LoadNodes.size() == 2) {
18548       // If we have load/store pair instructions and we only have two values,
18549       // don't bother merging.
18550       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18551           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18552         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18553         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18554         break;
18555       }
18556       // If the loads are reversed, see if we can rotate the halves into place.
18557       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18558       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18559       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18560       if (Offset0 - Offset1 == ElementSizeBytes &&
18561           (hasOperation(ISD::ROTL, PairVT) ||
18562            hasOperation(ISD::ROTR, PairVT))) {
18563         std::swap(LoadNodes[0], LoadNodes[1]);
18564         NeedRotate = true;
18565       }
18566     }
18567     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18568     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18569     Align FirstStoreAlign = FirstInChain->getAlign();
18570     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18571
18572     // Scan the memory operations on the chain and find the first
18573     // non-consecutive load memory address. These variables hold the index in
18574     // the store node array.
18575
18576     unsigned LastConsecutiveLoad = 1;
18577
18578     // This variable refers to the size and not index in the array.
18579     unsigned LastLegalVectorType = 1;
18580     unsigned LastLegalIntegerType = 1;
18581     bool isDereferenceable = true;
18582     bool DoIntegerTruncate = false;
18583     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18584     SDValue LoadChain = FirstLoad->getChain();
18585     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18586       // All loads must share the same chain.
18587       if (LoadNodes[i].MemNode->getChain() != LoadChain)
18588         break;
18589
18590       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18591       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18592         break;
18593       LastConsecutiveLoad = i;
18594
18595       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18596         isDereferenceable = false;
18597
18598       // Find a legal type for the vector store.
18599       unsigned Elts = (i + 1) * NumMemElts;
18600       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18601
18602       // Break early when size is too large to be legal.
18603       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18604         break;
18605
18606       bool IsFastSt = false;
18607       bool IsFastLd = false;
18608       // Don't try vector types if we need a rotate. We may still fail the
18609       // legality checks for the integer type, but we can't handle the rotate
18610       // case with vectors.
18611       // FIXME: We could use a shuffle in place of the rotate.
18612       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18613           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18614                                DAG.getMachineFunction()) &&
18615           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18616                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18617           IsFastSt &&
18618           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18619                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18620           IsFastLd) {
18621         LastLegalVectorType = i + 1;
18622       }
18623
18624       // Find a legal type for the integer store.
18625       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18626       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18627       if (TLI.isTypeLegal(StoreTy) &&
18628           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18629                                DAG.getMachineFunction()) &&
18630           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18631                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18632           IsFastSt &&
18633           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18634                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18635           IsFastLd) {
18636         LastLegalIntegerType = i + 1;
18637         DoIntegerTruncate = false;
18638         // Or check whether a truncstore and extload is legal.
18639       } else if (TLI.getTypeAction(Context, StoreTy) ==
18640                  TargetLowering::TypePromoteInteger) {
18641         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18642         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18643             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18644                                  DAG.getMachineFunction()) &&
18645             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18646             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18647             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18648             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18649                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
18650             IsFastSt &&
18651             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18652                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
18653             IsFastLd) {
18654           LastLegalIntegerType = i + 1;
18655           DoIntegerTruncate = true;
18656         }
18657       }
18658     }
18659
18660     // Only use vector types if the vector type is larger than the integer
18661     // type. If they are the same, use integers.
18662     bool UseVectorTy =
18663         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18664     unsigned LastLegalType =
18665         std::max(LastLegalVectorType, LastLegalIntegerType);
18666
18667     // We add +1 here because the LastXXX variables refer to location while
18668     // the NumElem refers to array/index size.
18669     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18670     NumElem = std::min(LastLegalType, NumElem);
18671     Align FirstLoadAlign = FirstLoad->getAlign();
18672
18673     if (NumElem < 2) {
18674       // We know that candidate stores are in order and of correct
18675       // shape. While there is no mergeable sequence from the
18676       // beginning one may start later in the sequence. The only
18677       // reason a merge of size N could have failed where another of
18678       // the same size would not have is if the alignment or either
18679       // the load or store has improved. Drop as many candidates as we
18680       // can here.
18681       unsigned NumSkip = 1;
18682       while ((NumSkip < LoadNodes.size()) &&
18683              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18684              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18685         NumSkip++;
18686       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18687       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18688       NumConsecutiveStores -= NumSkip;
18689       continue;
18690     }
18691
18692     // Check that we can merge these candidates without causing a cycle.
18693     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18694                                                   RootNode)) {
18695       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18696       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18697       NumConsecutiveStores -= NumElem;
18698       continue;
18699     }
18700
18701     // Find if it is better to use vectors or integers to load and store
18702     // to memory.
18703     EVT JointMemOpVT;
18704     if (UseVectorTy) {
18705       // Find a legal type for the vector store.
18706       unsigned Elts = NumElem * NumMemElts;
18707       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18708     } else {
18709       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18710       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18711     }
18712
18713     SDLoc LoadDL(LoadNodes[0].MemNode);
18714     SDLoc StoreDL(StoreNodes[0].MemNode);
18715
18716     // The merged loads are required to have the same incoming chain, so
18717     // using the first's chain is acceptable.
18718
18719     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18720     AddToWorklist(NewStoreChain.getNode());
18721
18722     MachineMemOperand::Flags LdMMOFlags =
18723         isDereferenceable ? MachineMemOperand::MODereferenceable
18724                           : MachineMemOperand::MONone;
18725     if (IsNonTemporalLoad)
18726       LdMMOFlags |= MachineMemOperand::MONonTemporal;
18727
18728     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18729                                               ? MachineMemOperand::MONonTemporal
18730                                               : MachineMemOperand::MONone;
18731
18732     SDValue NewLoad, NewStore;
18733     if (UseVectorTy || !DoIntegerTruncate) {
18734       NewLoad = DAG.getLoad(
18735           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18736           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18737       SDValue StoreOp = NewLoad;
18738       if (NeedRotate) {
18739         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18740         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18741                "Unexpected type for rotate-able load pair");
18742         SDValue RotAmt =
18743             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18744         // Target can convert to the identical ROTR if it does not have ROTL.
18745         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18746       }
18747       NewStore = DAG.getStore(
18748           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18749           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18750     } else { // This must be the truncstore/extload case
18751       EVT ExtendedTy =
18752           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18753       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18754                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
18755                                FirstLoad->getPointerInfo(), JointMemOpVT,
18756                                FirstLoadAlign, LdMMOFlags);
18757       NewStore = DAG.getTruncStore(
18758           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18759           FirstInChain->getPointerInfo(), JointMemOpVT,
18760           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18761     }
18762
18763     // Transfer chain users from old loads to the new load.
18764     for (unsigned i = 0; i < NumElem; ++i) {
18765       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18766       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18767                                     SDValue(NewLoad.getNode(), 1));
18768     }
18769
18770     // Replace all stores with the new store. Recursively remove corresponding
18771     // values if they are no longer used.
18772     for (unsigned i = 0; i < NumElem; ++i) {
18773       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18774       CombineTo(StoreNodes[i].MemNode, NewStore);
18775       if (Val->use_empty())
18776         recursivelyDeleteUnusedNodes(Val.getNode());
18777     }
18778
18779     MadeChange = true;
18780     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18781     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18782     NumConsecutiveStores -= NumElem;
18783   }
18784   return MadeChange;
18785 }
18786
18787 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18788   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18789     return false;
18790
18791   // TODO: Extend this function to merge stores of scalable vectors.
18792   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18793   // store since we know <vscale x 16 x i8> is exactly twice as large as
18794   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18795   EVT MemVT = St->getMemoryVT();
18796   if (MemVT.isScalableVector())
18797     return false;
18798   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18799     return false;
18800
18801   // This function cannot currently deal with non-byte-sized memory sizes.
18802   int64_t ElementSizeBytes = MemVT.getStoreSize();
18803   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18804     return false;
18805
18806   // Do not bother looking at stored values that are not constants, loads, or
18807   // extracted vector elements.
18808   SDValue StoredVal = peekThroughBitcasts(St->getValue());
18809   const StoreSource StoreSrc = getStoreSource(StoredVal);
18810   if (StoreSrc == StoreSource::Unknown)
18811     return false;
18812
18813   SmallVector<MemOpLink, 8> StoreNodes;
18814   SDNode *RootNode;
18815   // Find potential store merge candidates by searching through chain sub-DAG
18816   getStoreMergeCandidates(St, StoreNodes, RootNode);
18817
18818   // Check if there is anything to merge.
18819   if (StoreNodes.size() < 2)
18820     return false;
18821
18822   // Sort the memory operands according to their distance from the
18823   // base pointer.
18824   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18825     return LHS.OffsetFromBase < RHS.OffsetFromBase;
18826   });
18827
18828   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18829       Attribute::NoImplicitFloat);
18830   bool IsNonTemporalStore = St->isNonTemporal();
18831   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18832                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
18833
18834   // Store Merge attempts to merge the lowest stores. This generally
18835   // works out as if successful, as the remaining stores are checked
18836   // after the first collection of stores is merged. However, in the
18837   // case that a non-mergeable store is found first, e.g., {p[-2],
18838   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18839   // mergeable cases. To prevent this, we prune such stores from the
18840   // front of StoreNodes here.
18841   bool MadeChange = false;
18842   while (StoreNodes.size() > 1) {
18843     unsigned NumConsecutiveStores =
18844         getConsecutiveStores(StoreNodes, ElementSizeBytes);
18845     // There are no more stores in the list to examine.
18846     if (NumConsecutiveStores == 0)
18847       return MadeChange;
18848
18849     // We have at least 2 consecutive stores. Try to merge them.
18850     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18851     switch (StoreSrc) {
18852     case StoreSource::Constant:
18853       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18854                                              MemVT, RootNode, AllowVectors);
18855       break;
18856
18857     case StoreSource::Extract:
18858       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18859                                             MemVT, RootNode);
18860       break;
18861
18862     case StoreSource::Load:
18863       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18864                                          MemVT, RootNode, AllowVectors,
18865                                          IsNonTemporalStore, IsNonTemporalLoad);
18866       break;
18867
18868     default:
18869       llvm_unreachable("Unhandled store source type");
18870     }
18871   }
18872   return MadeChange;
18873 }
18874
18875 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18876   SDLoc SL(ST);
18877   SDValue ReplStore;
18878
18879   // Replace the chain to avoid dependency.
18880   if (ST->isTruncatingStore()) {
18881     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18882                                   ST->getBasePtr(), ST->getMemoryVT(),
18883                                   ST->getMemOperand());
18884   } else {
18885     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18886                              ST->getMemOperand());
18887   }
18888
18889   // Create token to keep both nodes around.
18890   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18891                               MVT::Other, ST->getChain(), ReplStore);
18892
18893   // Make sure the new and old chains are cleaned up.
18894   AddToWorklist(Token.getNode());
18895
18896   // Don't add users to work list.
18897   return CombineTo(ST, Token, false);
18898 }
18899
18900 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18901   SDValue Value = ST->getValue();
18902   if (Value.getOpcode() == ISD::TargetConstantFP)
18903     return SDValue();
18904
18905   if (!ISD::isNormalStore(ST))
18906     return SDValue();
18907
18908   SDLoc DL(ST);
18909
18910   SDValue Chain = ST->getChain();
18911   SDValue Ptr = ST->getBasePtr();
18912
18913   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18914
18915   // NOTE: If the original store is volatile, this transform must not increase
18916   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18917   // processor operation but an i64 (which is not legal) requires two.  So the
18918   // transform should not be done in this case.
18919
18920   SDValue Tmp;
18921   switch (CFP->getSimpleValueType(0).SimpleTy) {
18922   default:
18923     llvm_unreachable("Unknown FP type");
18924   case MVT::f16:    // We don't do this for these yet.
18925   case MVT::bf16:
18926   case MVT::f80:
18927   case MVT::f128:
18928   case MVT::ppcf128:
18929     return SDValue();
18930   case MVT::f32:
18931     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18932         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18933       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18934                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18935                             MVT::i32);
18936       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18937     }
18938
18939     return SDValue();
18940   case MVT::f64:
18941     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18942          ST->isSimple()) ||
18943         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18944       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18945                             getZExtValue(), SDLoc(CFP), MVT::i64);
18946       return DAG.getStore(Chain, DL, Tmp,
18947                           Ptr, ST->getMemOperand());
18948     }
18949
18950     if (ST->isSimple() &&
18951         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18952       // Many FP stores are not made apparent until after legalize, e.g. for
18953       // argument passing.  Since this is so common, custom legalize the
18954       // 64-bit integer store into two 32-bit stores.
18955       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18956       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18957       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18958       if (DAG.getDataLayout().isBigEndian())
18959         std::swap(Lo, Hi);
18960
18961       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18962       AAMDNodes AAInfo = ST->getAAInfo();
18963
18964       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18965                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18966       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18967       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18968                                  ST->getPointerInfo().getWithOffset(4),
18969                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18970       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18971                          St0, St1);
18972     }
18973
18974     return SDValue();
18975   }
18976 }
18977
18978 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18979   StoreSDNode *ST  = cast<StoreSDNode>(N);
18980   SDValue Chain = ST->getChain();
18981   SDValue Value = ST->getValue();
18982   SDValue Ptr   = ST->getBasePtr();
18983
18984   // If this is a store of a bit convert, store the input value if the
18985   // resultant store does not need a higher alignment than the original.
18986   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18987       ST->isUnindexed()) {
18988     EVT SVT = Value.getOperand(0).getValueType();
18989     // If the store is volatile, we only want to change the store type if the
18990     // resulting store is legal. Otherwise we might increase the number of
18991     // memory accesses. We don't care if the original type was legal or not
18992     // as we assume software couldn't rely on the number of accesses of an
18993     // illegal type.
18994     // TODO: May be able to relax for unordered atomics (see D66309)
18995     if (((!LegalOperations && ST->isSimple()) ||
18996          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18997         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18998                                      DAG, *ST->getMemOperand())) {
18999       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
19000                           ST->getMemOperand());
19001     }
19002   }
19003
19004   // Turn 'store undef, Ptr' -> nothing.
19005   if (Value.isUndef() && ST->isUnindexed())
19006     return Chain;
19007
19008   // Try to infer better alignment information than the store already has.
19009   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
19010     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19011       if (*Alignment > ST->getAlign() &&
19012           isAligned(*Alignment, ST->getSrcValueOffset())) {
19013         SDValue NewStore =
19014             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
19015                               ST->getMemoryVT(), *Alignment,
19016                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
19017         // NewStore will always be N as we are only refining the alignment
19018         assert(NewStore.getNode() == N);
19019         (void)NewStore;
19020       }
19021     }
19022   }
19023
19024   // Try transforming a pair floating point load / store ops to integer
19025   // load / store ops.
19026   if (SDValue NewST = TransformFPLoadStorePair(N))
19027     return NewST;
19028
19029   // Try transforming several stores into STORE (BSWAP).
19030   if (SDValue Store = mergeTruncStores(ST))
19031     return Store;
19032
19033   if (ST->isUnindexed()) {
19034     // Walk up chain skipping non-aliasing memory nodes, on this store and any
19035     // adjacent stores.
19036     if (findBetterNeighborChains(ST)) {
19037       // replaceStoreChain uses CombineTo, which handled all of the worklist
19038       // manipulation. Return the original node to not do anything else.
19039       return SDValue(ST, 0);
19040     }
19041     Chain = ST->getChain();
19042   }
19043
19044   // FIXME: is there such a thing as a truncating indexed store?
19045   if (ST->isTruncatingStore() && ST->isUnindexed() &&
19046       Value.getValueType().isInteger() &&
19047       (!isa<ConstantSDNode>(Value) ||
19048        !cast<ConstantSDNode>(Value)->isOpaque())) {
19049     // Convert a truncating store of a extension into a standard store.
19050     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
19051          Value.getOpcode() == ISD::SIGN_EXTEND ||
19052          Value.getOpcode() == ISD::ANY_EXTEND) &&
19053         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
19054         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
19055       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
19056                           ST->getMemOperand());
19057
19058     APInt TruncDemandedBits =
19059         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
19060                              ST->getMemoryVT().getScalarSizeInBits());
19061
19062     // See if we can simplify the operation with SimplifyDemandedBits, which
19063     // only works if the value has a single use.
19064     AddToWorklist(Value.getNode());
19065     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
19066       // Re-visit the store if anything changed and the store hasn't been merged
19067       // with another node (N is deleted) SimplifyDemandedBits will add Value's
19068       // node back to the worklist if necessary, but we also need to re-visit
19069       // the Store node itself.
19070       if (N->getOpcode() != ISD::DELETED_NODE)
19071         AddToWorklist(N);
19072       return SDValue(N, 0);
19073     }
19074
19075     // Otherwise, see if we can simplify the input to this truncstore with
19076     // knowledge that only the low bits are being used.  For example:
19077     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
19078     if (SDValue Shorter =
19079             TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
19080       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
19081                                ST->getMemOperand());
19082
19083     // If we're storing a truncated constant, see if we can simplify it.
19084     // TODO: Move this to targetShrinkDemandedConstant?
19085     if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
19086       if (!Cst->isOpaque()) {
19087         const APInt &CValue = Cst->getAPIntValue();
19088         APInt NewVal = CValue & TruncDemandedBits;
19089         if (NewVal != CValue) {
19090           SDValue Shorter =
19091               DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
19092           return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
19093                                    ST->getMemoryVT(), ST->getMemOperand());
19094         }
19095       }
19096   }
19097
19098   // If this is a load followed by a store to the same location, then the store
19099   // is dead/noop.
19100   // TODO: Can relax for unordered atomics (see D66309)
19101   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
19102     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
19103         ST->isUnindexed() && ST->isSimple() &&
19104         Ld->getAddressSpace() == ST->getAddressSpace() &&
19105         // There can't be any side effects between the load and store, such as
19106         // a call or store.
19107         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
19108       // The store is dead, remove it.
19109       return Chain;
19110     }
19111   }
19112
19113   // TODO: Can relax for unordered atomics (see D66309)
19114   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
19115     if (ST->isUnindexed() && ST->isSimple() &&
19116         ST1->isUnindexed() && ST1->isSimple()) {
19117       if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
19118           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
19119           ST->getAddressSpace() == ST1->getAddressSpace()) {
19120         // If this is a store followed by a store with the same value to the
19121         // same location, then the store is dead/noop.
19122         return Chain;
19123       }
19124
19125       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
19126           !ST1->getBasePtr().isUndef() &&
19127           // BaseIndexOffset and the code below requires knowing the size
19128           // of a vector, so bail out if MemoryVT is scalable.
19129           !ST->getMemoryVT().isScalableVector() &&
19130           !ST1->getMemoryVT().isScalableVector() &&
19131           ST->getAddressSpace() == ST1->getAddressSpace()) {
19132         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
19133         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
19134         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
19135         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
19136         // If this is a store who's preceding store to a subset of the current
19137         // location and no one other node is chained to that store we can
19138         // effectively drop the store. Do not remove stores to undef as they may
19139         // be used as data sinks.
19140         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
19141           CombineTo(ST1, ST1->getChain());
19142           return SDValue();
19143         }
19144       }
19145     }
19146   }
19147
19148   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
19149   // truncating store.  We can do this even if this is already a truncstore.
19150   if ((Value.getOpcode() == ISD::FP_ROUND ||
19151        Value.getOpcode() == ISD::TRUNCATE) &&
19152       Value->hasOneUse() && ST->isUnindexed() &&
19153       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
19154                                ST->getMemoryVT(), LegalOperations)) {
19155     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
19156                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
19157   }
19158
19159   // Always perform this optimization before types are legal. If the target
19160   // prefers, also try this after legalization to catch stores that were created
19161   // by intrinsics or other nodes.
19162   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
19163     while (true) {
19164       // There can be multiple store sequences on the same chain.
19165       // Keep trying to merge store sequences until we are unable to do so
19166       // or until we merge the last store on the chain.
19167       bool Changed = mergeConsecutiveStores(ST);
19168       if (!Changed) break;
19169       // Return N as merge only uses CombineTo and no worklist clean
19170       // up is necessary.
19171       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
19172         return SDValue(N, 0);
19173     }
19174   }
19175
19176   // Try transforming N to an indexed store.
19177   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19178     return SDValue(N, 0);
19179
19180   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
19181   //
19182   // Make sure to do this only after attempting to merge stores in order to
19183   //  avoid changing the types of some subset of stores due to visit order,
19184   //  preventing their merging.
19185   if (isa<ConstantFPSDNode>(ST->getValue())) {
19186     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
19187       return NewSt;
19188   }
19189
19190   if (SDValue NewSt = splitMergedValStore(ST))
19191     return NewSt;
19192
19193   return ReduceLoadOpStoreWidth(N);
19194 }
19195
19196 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
19197   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
19198   if (!LifetimeEnd->hasOffset())
19199     return SDValue();
19200
19201   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
19202                                         LifetimeEnd->getOffset(), false);
19203
19204   // We walk up the chains to find stores.
19205   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
19206   while (!Chains.empty()) {
19207     SDValue Chain = Chains.pop_back_val();
19208     if (!Chain.hasOneUse())
19209       continue;
19210     switch (Chain.getOpcode()) {
19211     case ISD::TokenFactor:
19212       for (unsigned Nops = Chain.getNumOperands(); Nops;)
19213         Chains.push_back(Chain.getOperand(--Nops));
19214       break;
19215     case ISD::LIFETIME_START:
19216     case ISD::LIFETIME_END:
19217       // We can forward past any lifetime start/end that can be proven not to
19218       // alias the node.
19219       if (!mayAlias(Chain.getNode(), N))
19220         Chains.push_back(Chain.getOperand(0));
19221       break;
19222     case ISD::STORE: {
19223       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
19224       // TODO: Can relax for unordered atomics (see D66309)
19225       if (!ST->isSimple() || ST->isIndexed())
19226         continue;
19227       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
19228       // The bounds of a scalable store are not known until runtime, so this
19229       // store cannot be elided.
19230       if (StoreSize.isScalable())
19231         continue;
19232       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
19233       // If we store purely within object bounds just before its lifetime ends,
19234       // we can remove the store.
19235       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
19236                                    StoreSize.getFixedSize() * 8)) {
19237         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
19238                    dbgs() << "\nwithin LIFETIME_END of : ";
19239                    LifetimeEndBase.dump(); dbgs() << "\n");
19240         CombineTo(ST, ST->getChain());
19241         return SDValue(N, 0);
19242       }
19243     }
19244     }
19245   }
19246   return SDValue();
19247 }
19248
19249 /// For the instruction sequence of store below, F and I values
19250 /// are bundled together as an i64 value before being stored into memory.
19251 /// Sometimes it is more efficent to generate separate stores for F and I,
19252 /// which can remove the bitwise instructions or sink them to colder places.
19253 ///
19254 ///   (store (or (zext (bitcast F to i32) to i64),
19255 ///              (shl (zext I to i64), 32)), addr)  -->
19256 ///   (store F, addr) and (store I, addr+4)
19257 ///
19258 /// Similarly, splitting for other merged store can also be beneficial, like:
19259 /// For pair of {i32, i32}, i64 store --> two i32 stores.
19260 /// For pair of {i32, i16}, i64 store --> two i32 stores.
19261 /// For pair of {i16, i16}, i32 store --> two i16 stores.
19262 /// For pair of {i16, i8},  i32 store --> two i16 stores.
19263 /// For pair of {i8, i8},   i16 store --> two i8 stores.
19264 ///
19265 /// We allow each target to determine specifically which kind of splitting is
19266 /// supported.
19267 ///
19268 /// The store patterns are commonly seen from the simple code snippet below
19269 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
19270 ///   void goo(const std::pair<int, float> &);
19271 ///   hoo() {
19272 ///     ...
19273 ///     goo(std::make_pair(tmp, ftmp));
19274 ///     ...
19275 ///   }
19276 ///
19277 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
19278   if (OptLevel == CodeGenOpt::None)
19279     return SDValue();
19280
19281   // Can't change the number of memory accesses for a volatile store or break
19282   // atomicity for an atomic one.
19283   if (!ST->isSimple())
19284     return SDValue();
19285
19286   SDValue Val = ST->getValue();
19287   SDLoc DL(ST);
19288
19289   // Match OR operand.
19290   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
19291     return SDValue();
19292
19293   // Match SHL operand and get Lower and Higher parts of Val.
19294   SDValue Op1 = Val.getOperand(0);
19295   SDValue Op2 = Val.getOperand(1);
19296   SDValue Lo, Hi;
19297   if (Op1.getOpcode() != ISD::SHL) {
19298     std::swap(Op1, Op2);
19299     if (Op1.getOpcode() != ISD::SHL)
19300       return SDValue();
19301   }
19302   Lo = Op2;
19303   Hi = Op1.getOperand(0);
19304   if (!Op1.hasOneUse())
19305     return SDValue();
19306
19307   // Match shift amount to HalfValBitSize.
19308   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
19309   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
19310   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
19311     return SDValue();
19312
19313   // Lo and Hi are zero-extended from int with size less equal than 32
19314   // to i64.
19315   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
19316       !Lo.getOperand(0).getValueType().isScalarInteger() ||
19317       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
19318       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
19319       !Hi.getOperand(0).getValueType().isScalarInteger() ||
19320       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
19321     return SDValue();
19322
19323   // Use the EVT of low and high parts before bitcast as the input
19324   // of target query.
19325   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
19326                   ? Lo.getOperand(0).getValueType()
19327                   : Lo.getValueType();
19328   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
19329                    ? Hi.getOperand(0).getValueType()
19330                    : Hi.getValueType();
19331   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
19332     return SDValue();
19333
19334   // Start to split store.
19335   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
19336   AAMDNodes AAInfo = ST->getAAInfo();
19337
19338   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
19339   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
19340   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
19341   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
19342
19343   SDValue Chain = ST->getChain();
19344   SDValue Ptr = ST->getBasePtr();
19345   // Lower value store.
19346   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
19347                              ST->getOriginalAlign(), MMOFlags, AAInfo);
19348   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
19349   // Higher value store.
19350   SDValue St1 = DAG.getStore(
19351       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
19352       ST->getOriginalAlign(), MMOFlags, AAInfo);
19353   return St1;
19354 }
19355
19356 /// Convert a disguised subvector insertion into a shuffle:
19357 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
19358   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
19359          "Expected extract_vector_elt");
19360   SDValue InsertVal = N->getOperand(1);
19361   SDValue Vec = N->getOperand(0);
19362
19363   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
19364   // InsIndex)
19365   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
19366   //   CONCAT_VECTORS.
19367   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
19368       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19369       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
19370     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
19371     ArrayRef<int> Mask = SVN->getMask();
19372
19373     SDValue X = Vec.getOperand(0);
19374     SDValue Y = Vec.getOperand(1);
19375
19376     // Vec's operand 0 is using indices from 0 to N-1 and
19377     // operand 1 from N to 2N - 1, where N is the number of
19378     // elements in the vectors.
19379     SDValue InsertVal0 = InsertVal.getOperand(0);
19380     int ElementOffset = -1;
19381
19382     // We explore the inputs of the shuffle in order to see if we find the
19383     // source of the extract_vector_elt. If so, we can use it to modify the
19384     // shuffle rather than perform an insert_vector_elt.
19385     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
19386     ArgWorkList.emplace_back(Mask.size(), Y);
19387     ArgWorkList.emplace_back(0, X);
19388
19389     while (!ArgWorkList.empty()) {
19390       int ArgOffset;
19391       SDValue ArgVal;
19392       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
19393
19394       if (ArgVal == InsertVal0) {
19395         ElementOffset = ArgOffset;
19396         break;
19397       }
19398
19399       // Peek through concat_vector.
19400       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
19401         int CurrentArgOffset =
19402             ArgOffset + ArgVal.getValueType().getVectorNumElements();
19403         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
19404         for (SDValue Op : reverse(ArgVal->ops())) {
19405           CurrentArgOffset -= Step;
19406           ArgWorkList.emplace_back(CurrentArgOffset, Op);
19407         }
19408
19409         // Make sure we went through all the elements and did not screw up index
19410         // computation.
19411         assert(CurrentArgOffset == ArgOffset);
19412       }
19413     }
19414
19415     // If we failed to find a match, see if we can replace an UNDEF shuffle
19416     // operand.
19417     if (ElementOffset == -1 && Y.isUndef() &&
19418         InsertVal0.getValueType() == Y.getValueType()) {
19419       ElementOffset = Mask.size();
19420       Y = InsertVal0;
19421     }
19422
19423     if (ElementOffset != -1) {
19424       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
19425
19426       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
19427       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
19428       assert(NewMask[InsIndex] <
19429                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
19430              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
19431
19432       SDValue LegalShuffle =
19433               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
19434                                           Y, NewMask, DAG);
19435       if (LegalShuffle)
19436         return LegalShuffle;
19437     }
19438   }
19439
19440   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
19441   // bitcast(shuffle (bitcast V), (extended X), Mask)
19442   // Note: We do not use an insert_subvector node because that requires a
19443   // legal subvector type.
19444   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
19445       !InsertVal.getOperand(0).getValueType().isVector())
19446     return SDValue();
19447
19448   SDValue SubVec = InsertVal.getOperand(0);
19449   SDValue DestVec = N->getOperand(0);
19450   EVT SubVecVT = SubVec.getValueType();
19451   EVT VT = DestVec.getValueType();
19452   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
19453   // If the source only has a single vector element, the cost of creating adding
19454   // it to a vector is likely to exceed the cost of a insert_vector_elt.
19455   if (NumSrcElts == 1)
19456     return SDValue();
19457   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
19458   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
19459
19460   // Step 1: Create a shuffle mask that implements this insert operation. The
19461   // vector that we are inserting into will be operand 0 of the shuffle, so
19462   // those elements are just 'i'. The inserted subvector is in the first
19463   // positions of operand 1 of the shuffle. Example:
19464   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
19465   SmallVector<int, 16> Mask(NumMaskVals);
19466   for (unsigned i = 0; i != NumMaskVals; ++i) {
19467     if (i / NumSrcElts == InsIndex)
19468       Mask[i] = (i % NumSrcElts) + NumMaskVals;
19469     else
19470       Mask[i] = i;
19471   }
19472
19473   // Bail out if the target can not handle the shuffle we want to create.
19474   EVT SubVecEltVT = SubVecVT.getVectorElementType();
19475   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
19476   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
19477     return SDValue();
19478
19479   // Step 2: Create a wide vector from the inserted source vector by appending
19480   // undefined elements. This is the same size as our destination vector.
19481   SDLoc DL(N);
19482   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
19483   ConcatOps[0] = SubVec;
19484   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
19485
19486   // Step 3: Shuffle in the padded subvector.
19487   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
19488   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
19489   AddToWorklist(PaddedSubV.getNode());
19490   AddToWorklist(DestVecBC.getNode());
19491   AddToWorklist(Shuf.getNode());
19492   return DAG.getBitcast(VT, Shuf);
19493 }
19494
19495 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
19496   SDValue InVec = N->getOperand(0);
19497   SDValue InVal = N->getOperand(1);
19498   SDValue EltNo = N->getOperand(2);
19499   SDLoc DL(N);
19500
19501   EVT VT = InVec.getValueType();
19502   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19503
19504   // Insert into out-of-bounds element is undefined.
19505   if (IndexC && VT.isFixedLengthVector() &&
19506       IndexC->getZExtValue() >= VT.getVectorNumElements())
19507     return DAG.getUNDEF(VT);
19508
19509   // Remove redundant insertions:
19510   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
19511   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19512       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
19513     return InVec;
19514
19515   if (!IndexC) {
19516     // If this is variable insert to undef vector, it might be better to splat:
19517     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
19518     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
19519       if (VT.isScalableVector())
19520         return DAG.getSplatVector(VT, DL, InVal);
19521
19522       SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
19523       return DAG.getBuildVector(VT, DL, Ops);
19524     }
19525     return SDValue();
19526   }
19527
19528   if (VT.isScalableVector())
19529     return SDValue();
19530
19531   unsigned NumElts = VT.getVectorNumElements();
19532
19533   // We must know which element is being inserted for folds below here.
19534   unsigned Elt = IndexC->getZExtValue();
19535
19536   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
19537     return Shuf;
19538
19539   // Handle <1 x ???> vector insertion special cases.
19540   if (NumElts == 1) {
19541     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
19542     if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19543         InVal.getOperand(0).getValueType() == VT &&
19544         isNullConstant(InVal.getOperand(1)))
19545       return InVal.getOperand(0);
19546   }
19547
19548   // Canonicalize insert_vector_elt dag nodes.
19549   // Example:
19550   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
19551   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
19552   //
19553   // Do this only if the child insert_vector node has one use; also
19554   // do this only if indices are both constants and Idx1 < Idx0.
19555   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19556       && isa<ConstantSDNode>(InVec.getOperand(2))) {
19557     unsigned OtherElt = InVec.getConstantOperandVal(2);
19558     if (Elt < OtherElt) {
19559       // Swap nodes.
19560       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19561                                   InVec.getOperand(0), InVal, EltNo);
19562       AddToWorklist(NewOp.getNode());
19563       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19564                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19565     }
19566   }
19567
19568   // Attempt to convert an insert_vector_elt chain into a legal build_vector.
19569   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
19570     // vXi1 vector - we don't need to recurse.
19571     if (NumElts == 1)
19572       return DAG.getBuildVector(VT, DL, {InVal});
19573
19574     // If we haven't already collected the element, insert into the op list.
19575     EVT MaxEltVT = InVal.getValueType();
19576     auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
19577                                 unsigned Idx) {
19578       if (!Ops[Idx]) {
19579         Ops[Idx] = Elt;
19580         if (VT.isInteger()) {
19581           EVT EltVT = Elt.getValueType();
19582           MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
19583         }
19584       }
19585     };
19586
19587     // Ensure all the operands are the same value type, fill any missing
19588     // operands with UNDEF and create the BUILD_VECTOR.
19589     auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
19590       assert(Ops.size() == NumElts && "Unexpected vector size");
19591       for (SDValue &Op : Ops) {
19592         if (Op)
19593           Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
19594         else
19595           Op = DAG.getUNDEF(MaxEltVT);
19596       }
19597       return DAG.getBuildVector(VT, DL, Ops);
19598     };
19599
19600     SmallVector<SDValue, 8> Ops(NumElts, SDValue());
19601     Ops[Elt] = InVal;
19602
19603     // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
19604     for (SDValue CurVec = InVec; CurVec;) {
19605       // UNDEF - build new BUILD_VECTOR from already inserted operands.
19606       if (CurVec.isUndef())
19607         return CanonicalizeBuildVector(Ops);
19608
19609       // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
19610       if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
19611         for (unsigned I = 0; I != NumElts; ++I)
19612           AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
19613         return CanonicalizeBuildVector(Ops);
19614       }
19615
19616       // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
19617       if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
19618         AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
19619         return CanonicalizeBuildVector(Ops);
19620       }
19621
19622       // INSERT_VECTOR_ELT - insert operand and continue up the chain.
19623       if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
19624         if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
19625           if (CurIdx->getAPIntValue().ult(NumElts)) {
19626             unsigned Idx = CurIdx->getZExtValue();
19627             AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
19628
19629             // Found entire BUILD_VECTOR.
19630             if (all_of(Ops, [](SDValue Op) { return !!Op; }))
19631               return CanonicalizeBuildVector(Ops);
19632
19633             CurVec = CurVec->getOperand(0);
19634             continue;
19635           }
19636
19637       // Failed to find a match in the chain - bail.
19638       break;
19639     }
19640
19641     // See if we can fill in the missing constant elements as zeros.
19642     // TODO: Should we do this for any constant?
19643     APInt DemandedZeroElts = APInt::getZero(NumElts);
19644     for (unsigned I = 0; I != NumElts; ++I)
19645       if (!Ops[I])
19646         DemandedZeroElts.setBit(I);
19647
19648     if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
19649       SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
19650                                     : DAG.getConstantFP(0, DL, MaxEltVT);
19651       for (unsigned I = 0; I != NumElts; ++I)
19652         if (!Ops[I])
19653           Ops[I] = Zero;
19654
19655       return CanonicalizeBuildVector(Ops);
19656     }
19657   }
19658
19659   return SDValue();
19660 }
19661
19662 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19663                                                   SDValue EltNo,
19664                                                   LoadSDNode *OriginalLoad) {
19665   assert(OriginalLoad->isSimple());
19666
19667   EVT ResultVT = EVE->getValueType(0);
19668   EVT VecEltVT = InVecVT.getVectorElementType();
19669
19670   // If the vector element type is not a multiple of a byte then we are unable
19671   // to correctly compute an address to load only the extracted element as a
19672   // scalar.
19673   if (!VecEltVT.isByteSized())
19674     return SDValue();
19675
19676   ISD::LoadExtType ExtTy =
19677       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19678   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19679       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19680     return SDValue();
19681
19682   Align Alignment = OriginalLoad->getAlign();
19683   MachinePointerInfo MPI;
19684   SDLoc DL(EVE);
19685   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19686     int Elt = ConstEltNo->getZExtValue();
19687     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19688     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19689     Alignment = commonAlignment(Alignment, PtrOff);
19690   } else {
19691     // Discard the pointer info except the address space because the memory
19692     // operand can't represent this new access since the offset is variable.
19693     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19694     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19695   }
19696
19697   bool IsFast = false;
19698   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19699                               OriginalLoad->getAddressSpace(), Alignment,
19700                               OriginalLoad->getMemOperand()->getFlags(),
19701                               &IsFast) ||
19702       !IsFast)
19703     return SDValue();
19704
19705   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19706                                                InVecVT, EltNo);
19707
19708   // We are replacing a vector load with a scalar load. The new load must have
19709   // identical memory op ordering to the original.
19710   SDValue Load;
19711   if (ResultVT.bitsGT(VecEltVT)) {
19712     // If the result type of vextract is wider than the load, then issue an
19713     // extending load instead.
19714     ISD::LoadExtType ExtType =
19715         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
19716                                                               : ISD::EXTLOAD;
19717     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
19718                           NewPtr, MPI, VecEltVT, Alignment,
19719                           OriginalLoad->getMemOperand()->getFlags(),
19720                           OriginalLoad->getAAInfo());
19721     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19722   } else {
19723     // The result type is narrower or the same width as the vector element
19724     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
19725                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
19726                        OriginalLoad->getAAInfo());
19727     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19728     if (ResultVT.bitsLT(VecEltVT))
19729       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
19730     else
19731       Load = DAG.getBitcast(ResultVT, Load);
19732   }
19733   ++OpsNarrowed;
19734   return Load;
19735 }
19736
19737 /// Transform a vector binary operation into a scalar binary operation by moving
19738 /// the math/logic after an extract element of a vector.
19739 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19740                                        bool LegalOperations) {
19741   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19742   SDValue Vec = ExtElt->getOperand(0);
19743   SDValue Index = ExtElt->getOperand(1);
19744   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19745   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19746       Vec->getNumValues() != 1)
19747     return SDValue();
19748
19749   // Targets may want to avoid this to prevent an expensive register transfer.
19750   if (!TLI.shouldScalarizeBinop(Vec))
19751     return SDValue();
19752
19753   // Extracting an element of a vector constant is constant-folded, so this
19754   // transform is just replacing a vector op with a scalar op while moving the
19755   // extract.
19756   SDValue Op0 = Vec.getOperand(0);
19757   SDValue Op1 = Vec.getOperand(1);
19758   APInt SplatVal;
19759   if (isAnyConstantBuildVector(Op0, true) ||
19760       ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
19761       isAnyConstantBuildVector(Op1, true) ||
19762       ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
19763     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19764     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19765     SDLoc DL(ExtElt);
19766     EVT VT = ExtElt->getValueType(0);
19767     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19768     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19769     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19770   }
19771
19772   return SDValue();
19773 }
19774
19775 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19776   SDValue VecOp = N->getOperand(0);
19777   SDValue Index = N->getOperand(1);
19778   EVT ScalarVT = N->getValueType(0);
19779   EVT VecVT = VecOp.getValueType();
19780   if (VecOp.isUndef())
19781     return DAG.getUNDEF(ScalarVT);
19782
19783   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19784   //
19785   // This only really matters if the index is non-constant since other combines
19786   // on the constant elements already work.
19787   SDLoc DL(N);
19788   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19789       Index == VecOp.getOperand(2)) {
19790     SDValue Elt = VecOp.getOperand(1);
19791     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19792   }
19793
19794   // (vextract (scalar_to_vector val, 0) -> val
19795   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19796     // Only 0'th element of SCALAR_TO_VECTOR is defined.
19797     if (DAG.isKnownNeverZero(Index))
19798       return DAG.getUNDEF(ScalarVT);
19799
19800     // Check if the result type doesn't match the inserted element type. A
19801     // SCALAR_TO_VECTOR may truncate the inserted element and the
19802     // EXTRACT_VECTOR_ELT may widen the extracted vector.
19803     SDValue InOp = VecOp.getOperand(0);
19804     if (InOp.getValueType() != ScalarVT) {
19805       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
19806              InOp.getValueType().bitsGT(ScalarVT));
19807       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
19808     }
19809     return InOp;
19810   }
19811
19812   // extract_vector_elt of out-of-bounds element -> UNDEF
19813   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19814   if (IndexC && VecVT.isFixedLengthVector() &&
19815       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19816     return DAG.getUNDEF(ScalarVT);
19817
19818   // extract_vector_elt (build_vector x, y), 1 -> y
19819   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19820        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19821       TLI.isTypeLegal(VecVT) &&
19822       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19823     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19824             VecVT.isFixedLengthVector()) &&
19825            "BUILD_VECTOR used for scalable vectors");
19826     unsigned IndexVal =
19827         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19828     SDValue Elt = VecOp.getOperand(IndexVal);
19829     EVT InEltVT = Elt.getValueType();
19830
19831     // Sometimes build_vector's scalar input types do not match result type.
19832     if (ScalarVT == InEltVT)
19833       return Elt;
19834
19835     // TODO: It may be useful to truncate if free if the build_vector implicitly
19836     // converts.
19837   }
19838
19839   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19840     return BO;
19841
19842   if (VecVT.isScalableVector())
19843     return SDValue();
19844
19845   // All the code from this point onwards assumes fixed width vectors, but it's
19846   // possible that some of the combinations could be made to work for scalable
19847   // vectors too.
19848   unsigned NumElts = VecVT.getVectorNumElements();
19849   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19850
19851   // TODO: These transforms should not require the 'hasOneUse' restriction, but
19852   // there are regressions on multiple targets without it. We can end up with a
19853   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19854   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19855       VecOp.hasOneUse()) {
19856     // The vector index of the LSBs of the source depend on the endian-ness.
19857     bool IsLE = DAG.getDataLayout().isLittleEndian();
19858     unsigned ExtractIndex = IndexC->getZExtValue();
19859     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19860     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19861     SDValue BCSrc = VecOp.getOperand(0);
19862     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19863       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19864
19865     if (LegalTypes && BCSrc.getValueType().isInteger() &&
19866         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19867       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19868       // trunc i64 X to i32
19869       SDValue X = BCSrc.getOperand(0);
19870       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19871              "Extract element and scalar to vector can't change element type "
19872              "from FP to integer.");
19873       unsigned XBitWidth = X.getValueSizeInBits();
19874       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19875
19876       // An extract element return value type can be wider than its vector
19877       // operand element type. In that case, the high bits are undefined, so
19878       // it's possible that we may need to extend rather than truncate.
19879       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19880         assert(XBitWidth % VecEltBitWidth == 0 &&
19881                "Scalar bitwidth must be a multiple of vector element bitwidth");
19882         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19883       }
19884     }
19885   }
19886
19887   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19888   // We only perform this optimization before the op legalization phase because
19889   // we may introduce new vector instructions which are not backed by TD
19890   // patterns. For example on AVX, extracting elements from a wide vector
19891   // without using extract_subvector. However, if we can find an underlying
19892   // scalar value, then we can always use that.
19893   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19894     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19895     // Find the new index to extract from.
19896     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19897
19898     // Extracting an undef index is undef.
19899     if (OrigElt == -1)
19900       return DAG.getUNDEF(ScalarVT);
19901
19902     // Select the right vector half to extract from.
19903     SDValue SVInVec;
19904     if (OrigElt < (int)NumElts) {
19905       SVInVec = VecOp.getOperand(0);
19906     } else {
19907       SVInVec = VecOp.getOperand(1);
19908       OrigElt -= NumElts;
19909     }
19910
19911     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19912       SDValue InOp = SVInVec.getOperand(OrigElt);
19913       if (InOp.getValueType() != ScalarVT) {
19914         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19915         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19916       }
19917
19918       return InOp;
19919     }
19920
19921     // FIXME: We should handle recursing on other vector shuffles and
19922     // scalar_to_vector here as well.
19923
19924     if (!LegalOperations ||
19925         // FIXME: Should really be just isOperationLegalOrCustom.
19926         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19927         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19928       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19929                          DAG.getVectorIdxConstant(OrigElt, DL));
19930     }
19931   }
19932
19933   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19934   // simplify it based on the (valid) extraction indices.
19935   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19936         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19937                Use->getOperand(0) == VecOp &&
19938                isa<ConstantSDNode>(Use->getOperand(1));
19939       })) {
19940     APInt DemandedElts = APInt::getZero(NumElts);
19941     for (SDNode *Use : VecOp->uses()) {
19942       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19943       if (CstElt->getAPIntValue().ult(NumElts))
19944         DemandedElts.setBit(CstElt->getZExtValue());
19945     }
19946     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19947       // We simplified the vector operand of this extract element. If this
19948       // extract is not dead, visit it again so it is folded properly.
19949       if (N->getOpcode() != ISD::DELETED_NODE)
19950         AddToWorklist(N);
19951       return SDValue(N, 0);
19952     }
19953     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19954     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19955       // We simplified the vector operand of this extract element. If this
19956       // extract is not dead, visit it again so it is folded properly.
19957       if (N->getOpcode() != ISD::DELETED_NODE)
19958         AddToWorklist(N);
19959       return SDValue(N, 0);
19960     }
19961   }
19962
19963   // Everything under here is trying to match an extract of a loaded value.
19964   // If the result of load has to be truncated, then it's not necessarily
19965   // profitable.
19966   bool BCNumEltsChanged = false;
19967   EVT ExtVT = VecVT.getVectorElementType();
19968   EVT LVT = ExtVT;
19969   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19970     return SDValue();
19971
19972   if (VecOp.getOpcode() == ISD::BITCAST) {
19973     // Don't duplicate a load with other uses.
19974     if (!VecOp.hasOneUse())
19975       return SDValue();
19976
19977     EVT BCVT = VecOp.getOperand(0).getValueType();
19978     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19979       return SDValue();
19980     if (NumElts != BCVT.getVectorNumElements())
19981       BCNumEltsChanged = true;
19982     VecOp = VecOp.getOperand(0);
19983     ExtVT = BCVT.getVectorElementType();
19984   }
19985
19986   // extract (vector load $addr), i --> load $addr + i * size
19987   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19988       ISD::isNormalLoad(VecOp.getNode()) &&
19989       !Index->hasPredecessor(VecOp.getNode())) {
19990     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19991     if (VecLoad && VecLoad->isSimple())
19992       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19993   }
19994
19995   // Perform only after legalization to ensure build_vector / vector_shuffle
19996   // optimizations have already been done.
19997   if (!LegalOperations || !IndexC)
19998     return SDValue();
19999
20000   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
20001   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
20002   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
20003   int Elt = IndexC->getZExtValue();
20004   LoadSDNode *LN0 = nullptr;
20005   if (ISD::isNormalLoad(VecOp.getNode())) {
20006     LN0 = cast<LoadSDNode>(VecOp);
20007   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20008              VecOp.getOperand(0).getValueType() == ExtVT &&
20009              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
20010     // Don't duplicate a load with other uses.
20011     if (!VecOp.hasOneUse())
20012       return SDValue();
20013
20014     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
20015   }
20016   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
20017     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
20018     // =>
20019     // (load $addr+1*size)
20020
20021     // Don't duplicate a load with other uses.
20022     if (!VecOp.hasOneUse())
20023       return SDValue();
20024
20025     // If the bit convert changed the number of elements, it is unsafe
20026     // to examine the mask.
20027     if (BCNumEltsChanged)
20028       return SDValue();
20029
20030     // Select the input vector, guarding against out of range extract vector.
20031     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
20032     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
20033
20034     if (VecOp.getOpcode() == ISD::BITCAST) {
20035       // Don't duplicate a load with other uses.
20036       if (!VecOp.hasOneUse())
20037         return SDValue();
20038
20039       VecOp = VecOp.getOperand(0);
20040     }
20041     if (ISD::isNormalLoad(VecOp.getNode())) {
20042       LN0 = cast<LoadSDNode>(VecOp);
20043       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
20044       Index = DAG.getConstant(Elt, DL, Index.getValueType());
20045     }
20046   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
20047              VecVT.getVectorElementType() == ScalarVT &&
20048              (!LegalTypes ||
20049               TLI.isTypeLegal(
20050                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
20051     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
20052     //      -> extract_vector_elt a, 0
20053     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
20054     //      -> extract_vector_elt a, 1
20055     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
20056     //      -> extract_vector_elt b, 0
20057     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
20058     //      -> extract_vector_elt b, 1
20059     SDLoc SL(N);
20060     EVT ConcatVT = VecOp.getOperand(0).getValueType();
20061     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
20062     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
20063                                      Index.getValueType());
20064
20065     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
20066     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
20067                               ConcatVT.getVectorElementType(),
20068                               ConcatOp, NewIdx);
20069     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
20070   }
20071
20072   // Make sure we found a non-volatile load and the extractelement is
20073   // the only use.
20074   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
20075     return SDValue();
20076
20077   // If Idx was -1 above, Elt is going to be -1, so just return undef.
20078   if (Elt == -1)
20079     return DAG.getUNDEF(LVT);
20080
20081   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
20082 }
20083
20084 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
20085 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
20086   // We perform this optimization post type-legalization because
20087   // the type-legalizer often scalarizes integer-promoted vectors.
20088   // Performing this optimization before may create bit-casts which
20089   // will be type-legalized to complex code sequences.
20090   // We perform this optimization only before the operation legalizer because we
20091   // may introduce illegal operations.
20092   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
20093     return SDValue();
20094
20095   unsigned NumInScalars = N->getNumOperands();
20096   SDLoc DL(N);
20097   EVT VT = N->getValueType(0);
20098
20099   // Check to see if this is a BUILD_VECTOR of a bunch of values
20100   // which come from any_extend or zero_extend nodes. If so, we can create
20101   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
20102   // optimizations. We do not handle sign-extend because we can't fill the sign
20103   // using shuffles.
20104   EVT SourceType = MVT::Other;
20105   bool AllAnyExt = true;
20106
20107   for (unsigned i = 0; i != NumInScalars; ++i) {
20108     SDValue In = N->getOperand(i);
20109     // Ignore undef inputs.
20110     if (In.isUndef()) continue;
20111
20112     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
20113     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
20114
20115     // Abort if the element is not an extension.
20116     if (!ZeroExt && !AnyExt) {
20117       SourceType = MVT::Other;
20118       break;
20119     }
20120
20121     // The input is a ZeroExt or AnyExt. Check the original type.
20122     EVT InTy = In.getOperand(0).getValueType();
20123
20124     // Check that all of the widened source types are the same.
20125     if (SourceType == MVT::Other)
20126       // First time.
20127       SourceType = InTy;
20128     else if (InTy != SourceType) {
20129       // Multiple income types. Abort.
20130       SourceType = MVT::Other;
20131       break;
20132     }
20133
20134     // Check if all of the extends are ANY_EXTENDs.
20135     AllAnyExt &= AnyExt;
20136   }
20137
20138   // In order to have valid types, all of the inputs must be extended from the
20139   // same source type and all of the inputs must be any or zero extend.
20140   // Scalar sizes must be a power of two.
20141   EVT OutScalarTy = VT.getScalarType();
20142   bool ValidTypes = SourceType != MVT::Other &&
20143                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
20144                  isPowerOf2_32(SourceType.getSizeInBits());
20145
20146   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
20147   // turn into a single shuffle instruction.
20148   if (!ValidTypes)
20149     return SDValue();
20150
20151   // If we already have a splat buildvector, then don't fold it if it means
20152   // introducing zeros.
20153   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
20154     return SDValue();
20155
20156   bool isLE = DAG.getDataLayout().isLittleEndian();
20157   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
20158   assert(ElemRatio > 1 && "Invalid element size ratio");
20159   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
20160                                DAG.getConstant(0, DL, SourceType);
20161
20162   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
20163   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
20164
20165   // Populate the new build_vector
20166   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20167     SDValue Cast = N->getOperand(i);
20168     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
20169             Cast.getOpcode() == ISD::ZERO_EXTEND ||
20170             Cast.isUndef()) && "Invalid cast opcode");
20171     SDValue In;
20172     if (Cast.isUndef())
20173       In = DAG.getUNDEF(SourceType);
20174     else
20175       In = Cast->getOperand(0);
20176     unsigned Index = isLE ? (i * ElemRatio) :
20177                             (i * ElemRatio + (ElemRatio - 1));
20178
20179     assert(Index < Ops.size() && "Invalid index");
20180     Ops[Index] = In;
20181   }
20182
20183   // The type of the new BUILD_VECTOR node.
20184   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
20185   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
20186          "Invalid vector size");
20187   // Check if the new vector type is legal.
20188   if (!isTypeLegal(VecVT) ||
20189       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
20190        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
20191     return SDValue();
20192
20193   // Make the new BUILD_VECTOR.
20194   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
20195
20196   // The new BUILD_VECTOR node has the potential to be further optimized.
20197   AddToWorklist(BV.getNode());
20198   // Bitcast to the desired type.
20199   return DAG.getBitcast(VT, BV);
20200 }
20201
20202 // Simplify (build_vec (trunc $1)
20203 //                     (trunc (srl $1 half-width))
20204 //                     (trunc (srl $1 (2 * half-width))) …)
20205 // to (bitcast $1)
20206 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
20207   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
20208
20209   // Only for little endian
20210   if (!DAG.getDataLayout().isLittleEndian())
20211     return SDValue();
20212
20213   SDLoc DL(N);
20214   EVT VT = N->getValueType(0);
20215   EVT OutScalarTy = VT.getScalarType();
20216   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
20217
20218   // Only for power of two types to be sure that bitcast works well
20219   if (!isPowerOf2_64(ScalarTypeBitsize))
20220     return SDValue();
20221
20222   unsigned NumInScalars = N->getNumOperands();
20223
20224   // Look through bitcasts
20225   auto PeekThroughBitcast = [](SDValue Op) {
20226     if (Op.getOpcode() == ISD::BITCAST)
20227       return Op.getOperand(0);
20228     return Op;
20229   };
20230
20231   // The source value where all the parts are extracted.
20232   SDValue Src;
20233   for (unsigned i = 0; i != NumInScalars; ++i) {
20234     SDValue In = PeekThroughBitcast(N->getOperand(i));
20235     // Ignore undef inputs.
20236     if (In.isUndef()) continue;
20237
20238     if (In.getOpcode() != ISD::TRUNCATE)
20239       return SDValue();
20240
20241     In = PeekThroughBitcast(In.getOperand(0));
20242
20243     if (In.getOpcode() != ISD::SRL) {
20244       // For now only build_vec without shuffling, handle shifts here in the
20245       // future.
20246       if (i != 0)
20247         return SDValue();
20248
20249       Src = In;
20250     } else {
20251       // In is SRL
20252       SDValue part = PeekThroughBitcast(In.getOperand(0));
20253
20254       if (!Src) {
20255         Src = part;
20256       } else if (Src != part) {
20257         // Vector parts do not stem from the same variable
20258         return SDValue();
20259       }
20260
20261       SDValue ShiftAmtVal = In.getOperand(1);
20262       if (!isa<ConstantSDNode>(ShiftAmtVal))
20263         return SDValue();
20264
20265       uint64_t ShiftAmt = In.getConstantOperandVal(1);
20266
20267       // The extracted value is not extracted at the right position
20268       if (ShiftAmt != i * ScalarTypeBitsize)
20269         return SDValue();
20270     }
20271   }
20272
20273   // Only cast if the size is the same
20274   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
20275     return SDValue();
20276
20277   return DAG.getBitcast(VT, Src);
20278 }
20279
20280 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
20281                                            ArrayRef<int> VectorMask,
20282                                            SDValue VecIn1, SDValue VecIn2,
20283                                            unsigned LeftIdx, bool DidSplitVec) {
20284   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
20285
20286   EVT VT = N->getValueType(0);
20287   EVT InVT1 = VecIn1.getValueType();
20288   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
20289
20290   unsigned NumElems = VT.getVectorNumElements();
20291   unsigned ShuffleNumElems = NumElems;
20292
20293   // If we artificially split a vector in two already, then the offsets in the
20294   // operands will all be based off of VecIn1, even those in VecIn2.
20295   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
20296
20297   uint64_t VTSize = VT.getFixedSizeInBits();
20298   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
20299   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
20300
20301   assert(InVT2Size <= InVT1Size &&
20302          "Inputs must be sorted to be in non-increasing vector size order.");
20303
20304   // We can't generate a shuffle node with mismatched input and output types.
20305   // Try to make the types match the type of the output.
20306   if (InVT1 != VT || InVT2 != VT) {
20307     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
20308       // If the output vector length is a multiple of both input lengths,
20309       // we can concatenate them and pad the rest with undefs.
20310       unsigned NumConcats = VTSize / InVT1Size;
20311       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
20312       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
20313       ConcatOps[0] = VecIn1;
20314       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
20315       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20316       VecIn2 = SDValue();
20317     } else if (InVT1Size == VTSize * 2) {
20318       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
20319         return SDValue();
20320
20321       if (!VecIn2.getNode()) {
20322         // If we only have one input vector, and it's twice the size of the
20323         // output, split it in two.
20324         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
20325                              DAG.getVectorIdxConstant(NumElems, DL));
20326         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
20327         // Since we now have shorter input vectors, adjust the offset of the
20328         // second vector's start.
20329         Vec2Offset = NumElems;
20330       } else {
20331         assert(InVT2Size <= InVT1Size &&
20332                "Second input is not going to be larger than the first one.");
20333
20334         // VecIn1 is wider than the output, and we have another, possibly
20335         // smaller input. Pad the smaller input with undefs, shuffle at the
20336         // input vector width, and extract the output.
20337         // The shuffle type is different than VT, so check legality again.
20338         if (LegalOperations &&
20339             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
20340           return SDValue();
20341
20342         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
20343         // lower it back into a BUILD_VECTOR. So if the inserted type is
20344         // illegal, don't even try.
20345         if (InVT1 != InVT2) {
20346           if (!TLI.isTypeLegal(InVT2))
20347             return SDValue();
20348           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
20349                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
20350         }
20351         ShuffleNumElems = NumElems * 2;
20352       }
20353     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
20354       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
20355       ConcatOps[0] = VecIn2;
20356       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20357     } else {
20358       // TODO: Support cases where the length mismatch isn't exactly by a
20359       // factor of 2.
20360       // TODO: Move this check upwards, so that if we have bad type
20361       // mismatches, we don't create any DAG nodes.
20362       return SDValue();
20363     }
20364   }
20365
20366   // Initialize mask to undef.
20367   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
20368
20369   // Only need to run up to the number of elements actually used, not the
20370   // total number of elements in the shuffle - if we are shuffling a wider
20371   // vector, the high lanes should be set to undef.
20372   for (unsigned i = 0; i != NumElems; ++i) {
20373     if (VectorMask[i] <= 0)
20374       continue;
20375
20376     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
20377     if (VectorMask[i] == (int)LeftIdx) {
20378       Mask[i] = ExtIndex;
20379     } else if (VectorMask[i] == (int)LeftIdx + 1) {
20380       Mask[i] = Vec2Offset + ExtIndex;
20381     }
20382   }
20383
20384   // The type the input vectors may have changed above.
20385   InVT1 = VecIn1.getValueType();
20386
20387   // If we already have a VecIn2, it should have the same type as VecIn1.
20388   // If we don't, get an undef/zero vector of the appropriate type.
20389   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
20390   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
20391
20392   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
20393   if (ShuffleNumElems > NumElems)
20394     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
20395
20396   return Shuffle;
20397 }
20398
20399 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
20400   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
20401
20402   // First, determine where the build vector is not undef.
20403   // TODO: We could extend this to handle zero elements as well as undefs.
20404   int NumBVOps = BV->getNumOperands();
20405   int ZextElt = -1;
20406   for (int i = 0; i != NumBVOps; ++i) {
20407     SDValue Op = BV->getOperand(i);
20408     if (Op.isUndef())
20409       continue;
20410     if (ZextElt == -1)
20411       ZextElt = i;
20412     else
20413       return SDValue();
20414   }
20415   // Bail out if there's no non-undef element.
20416   if (ZextElt == -1)
20417     return SDValue();
20418
20419   // The build vector contains some number of undef elements and exactly
20420   // one other element. That other element must be a zero-extended scalar
20421   // extracted from a vector at a constant index to turn this into a shuffle.
20422   // Also, require that the build vector does not implicitly truncate/extend
20423   // its elements.
20424   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
20425   EVT VT = BV->getValueType(0);
20426   SDValue Zext = BV->getOperand(ZextElt);
20427   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
20428       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20429       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
20430       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
20431     return SDValue();
20432
20433   // The zero-extend must be a multiple of the source size, and we must be
20434   // building a vector of the same size as the source of the extract element.
20435   SDValue Extract = Zext.getOperand(0);
20436   unsigned DestSize = Zext.getValueSizeInBits();
20437   unsigned SrcSize = Extract.getValueSizeInBits();
20438   if (DestSize % SrcSize != 0 ||
20439       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
20440     return SDValue();
20441
20442   // Create a shuffle mask that will combine the extracted element with zeros
20443   // and undefs.
20444   int ZextRatio = DestSize / SrcSize;
20445   int NumMaskElts = NumBVOps * ZextRatio;
20446   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
20447   for (int i = 0; i != NumMaskElts; ++i) {
20448     if (i / ZextRatio == ZextElt) {
20449       // The low bits of the (potentially translated) extracted element map to
20450       // the source vector. The high bits map to zero. We will use a zero vector
20451       // as the 2nd source operand of the shuffle, so use the 1st element of
20452       // that vector (mask value is number-of-elements) for the high bits.
20453       if (i % ZextRatio == 0)
20454         ShufMask[i] = Extract.getConstantOperandVal(1);
20455       else
20456         ShufMask[i] = NumMaskElts;
20457     }
20458
20459     // Undef elements of the build vector remain undef because we initialize
20460     // the shuffle mask with -1.
20461   }
20462
20463   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
20464   // bitcast (shuffle V, ZeroVec, VectorMask)
20465   SDLoc DL(BV);
20466   EVT VecVT = Extract.getOperand(0).getValueType();
20467   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
20468   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20469   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
20470                                              ZeroVec, ShufMask, DAG);
20471   if (!Shuf)
20472     return SDValue();
20473   return DAG.getBitcast(VT, Shuf);
20474 }
20475
20476 // FIXME: promote to STLExtras.
20477 template <typename R, typename T>
20478 static auto getFirstIndexOf(R &&Range, const T &Val) {
20479   auto I = find(Range, Val);
20480   if (I == Range.end())
20481     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
20482   return std::distance(Range.begin(), I);
20483 }
20484
20485 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
20486 // operations. If the types of the vectors we're extracting from allow it,
20487 // turn this into a vector_shuffle node.
20488 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
20489   SDLoc DL(N);
20490   EVT VT = N->getValueType(0);
20491
20492   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
20493   if (!isTypeLegal(VT))
20494     return SDValue();
20495
20496   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
20497     return V;
20498
20499   // May only combine to shuffle after legalize if shuffle is legal.
20500   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
20501     return SDValue();
20502
20503   bool UsesZeroVector = false;
20504   unsigned NumElems = N->getNumOperands();
20505
20506   // Record, for each element of the newly built vector, which input vector
20507   // that element comes from. -1 stands for undef, 0 for the zero vector,
20508   // and positive values for the input vectors.
20509   // VectorMask maps each element to its vector number, and VecIn maps vector
20510   // numbers to their initial SDValues.
20511
20512   SmallVector<int, 8> VectorMask(NumElems, -1);
20513   SmallVector<SDValue, 8> VecIn;
20514   VecIn.push_back(SDValue());
20515
20516   for (unsigned i = 0; i != NumElems; ++i) {
20517     SDValue Op = N->getOperand(i);
20518
20519     if (Op.isUndef())
20520       continue;
20521
20522     // See if we can use a blend with a zero vector.
20523     // TODO: Should we generalize this to a blend with an arbitrary constant
20524     // vector?
20525     if (isNullConstant(Op) || isNullFPConstant(Op)) {
20526       UsesZeroVector = true;
20527       VectorMask[i] = 0;
20528       continue;
20529     }
20530
20531     // Not an undef or zero. If the input is something other than an
20532     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
20533     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20534         !isa<ConstantSDNode>(Op.getOperand(1)))
20535       return SDValue();
20536     SDValue ExtractedFromVec = Op.getOperand(0);
20537
20538     if (ExtractedFromVec.getValueType().isScalableVector())
20539       return SDValue();
20540
20541     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
20542     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
20543       return SDValue();
20544
20545     // All inputs must have the same element type as the output.
20546     if (VT.getVectorElementType() !=
20547         ExtractedFromVec.getValueType().getVectorElementType())
20548       return SDValue();
20549
20550     // Have we seen this input vector before?
20551     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
20552     // a map back from SDValues to numbers isn't worth it.
20553     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
20554     if (Idx == -1) { // A new source vector?
20555       Idx = VecIn.size();
20556       VecIn.push_back(ExtractedFromVec);
20557     }
20558
20559     VectorMask[i] = Idx;
20560   }
20561
20562   // If we didn't find at least one input vector, bail out.
20563   if (VecIn.size() < 2)
20564     return SDValue();
20565
20566   // If all the Operands of BUILD_VECTOR extract from same
20567   // vector, then split the vector efficiently based on the maximum
20568   // vector access index and adjust the VectorMask and
20569   // VecIn accordingly.
20570   bool DidSplitVec = false;
20571   if (VecIn.size() == 2) {
20572     unsigned MaxIndex = 0;
20573     unsigned NearestPow2 = 0;
20574     SDValue Vec = VecIn.back();
20575     EVT InVT = Vec.getValueType();
20576     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
20577
20578     for (unsigned i = 0; i < NumElems; i++) {
20579       if (VectorMask[i] <= 0)
20580         continue;
20581       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
20582       IndexVec[i] = Index;
20583       MaxIndex = std::max(MaxIndex, Index);
20584     }
20585
20586     NearestPow2 = PowerOf2Ceil(MaxIndex);
20587     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
20588         NumElems * 2 < NearestPow2) {
20589       unsigned SplitSize = NearestPow2 / 2;
20590       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
20591                                      InVT.getVectorElementType(), SplitSize);
20592       if (TLI.isTypeLegal(SplitVT) &&
20593           SplitSize + SplitVT.getVectorNumElements() <=
20594               InVT.getVectorNumElements()) {
20595         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20596                                      DAG.getVectorIdxConstant(SplitSize, DL));
20597         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20598                                      DAG.getVectorIdxConstant(0, DL));
20599         VecIn.pop_back();
20600         VecIn.push_back(VecIn1);
20601         VecIn.push_back(VecIn2);
20602         DidSplitVec = true;
20603
20604         for (unsigned i = 0; i < NumElems; i++) {
20605           if (VectorMask[i] <= 0)
20606             continue;
20607           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20608         }
20609       }
20610     }
20611   }
20612
20613   // Sort input vectors by decreasing vector element count,
20614   // while preserving the relative order of equally-sized vectors.
20615   // Note that we keep the first "implicit zero vector as-is.
20616   SmallVector<SDValue, 8> SortedVecIn(VecIn);
20617   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20618                     [](const SDValue &a, const SDValue &b) {
20619                       return a.getValueType().getVectorNumElements() >
20620                              b.getValueType().getVectorNumElements();
20621                     });
20622
20623   // We now also need to rebuild the VectorMask, because it referenced element
20624   // order in VecIn, and we just sorted them.
20625   for (int &SourceVectorIndex : VectorMask) {
20626     if (SourceVectorIndex <= 0)
20627       continue;
20628     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20629     assert(Idx > 0 && Idx < SortedVecIn.size() &&
20630            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20631     SourceVectorIndex = Idx;
20632   }
20633
20634   VecIn = std::move(SortedVecIn);
20635
20636   // TODO: Should this fire if some of the input vectors has illegal type (like
20637   // it does now), or should we let legalization run its course first?
20638
20639   // Shuffle phase:
20640   // Take pairs of vectors, and shuffle them so that the result has elements
20641   // from these vectors in the correct places.
20642   // For example, given:
20643   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20644   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20645   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20646   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20647   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20648   // We will generate:
20649   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20650   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20651   SmallVector<SDValue, 4> Shuffles;
20652   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20653     unsigned LeftIdx = 2 * In + 1;
20654     SDValue VecLeft = VecIn[LeftIdx];
20655     SDValue VecRight =
20656         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20657
20658     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20659                                                 VecRight, LeftIdx, DidSplitVec))
20660       Shuffles.push_back(Shuffle);
20661     else
20662       return SDValue();
20663   }
20664
20665   // If we need the zero vector as an "ingredient" in the blend tree, add it
20666   // to the list of shuffles.
20667   if (UsesZeroVector)
20668     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20669                                       : DAG.getConstantFP(0.0, DL, VT));
20670
20671   // If we only have one shuffle, we're done.
20672   if (Shuffles.size() == 1)
20673     return Shuffles[0];
20674
20675   // Update the vector mask to point to the post-shuffle vectors.
20676   for (int &Vec : VectorMask)
20677     if (Vec == 0)
20678       Vec = Shuffles.size() - 1;
20679     else
20680       Vec = (Vec - 1) / 2;
20681
20682   // More than one shuffle. Generate a binary tree of blends, e.g. if from
20683   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20684   // generate:
20685   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20686   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20687   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20688   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20689   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20690   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20691   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20692
20693   // Make sure the initial size of the shuffle list is even.
20694   if (Shuffles.size() % 2)
20695     Shuffles.push_back(DAG.getUNDEF(VT));
20696
20697   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20698     if (CurSize % 2) {
20699       Shuffles[CurSize] = DAG.getUNDEF(VT);
20700       CurSize++;
20701     }
20702     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20703       int Left = 2 * In;
20704       int Right = 2 * In + 1;
20705       SmallVector<int, 8> Mask(NumElems, -1);
20706       SDValue L = Shuffles[Left];
20707       ArrayRef<int> LMask;
20708       bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
20709                            L.use_empty() && L.getOperand(1).isUndef() &&
20710                            L.getOperand(0).getValueType() == L.getValueType();
20711       if (IsLeftShuffle) {
20712         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
20713         L = L.getOperand(0);
20714       }
20715       SDValue R = Shuffles[Right];
20716       ArrayRef<int> RMask;
20717       bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
20718                             R.use_empty() && R.getOperand(1).isUndef() &&
20719                             R.getOperand(0).getValueType() == R.getValueType();
20720       if (IsRightShuffle) {
20721         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
20722         R = R.getOperand(0);
20723       }
20724       for (unsigned I = 0; I != NumElems; ++I) {
20725         if (VectorMask[I] == Left) {
20726           Mask[I] = I;
20727           if (IsLeftShuffle)
20728             Mask[I] = LMask[I];
20729           VectorMask[I] = In;
20730         } else if (VectorMask[I] == Right) {
20731           Mask[I] = I + NumElems;
20732           if (IsRightShuffle)
20733             Mask[I] = RMask[I] + NumElems;
20734           VectorMask[I] = In;
20735         }
20736       }
20737
20738       Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
20739     }
20740   }
20741   return Shuffles[0];
20742 }
20743
20744 // Try to turn a build vector of zero extends of extract vector elts into a
20745 // a vector zero extend and possibly an extract subvector.
20746 // TODO: Support sign extend?
20747 // TODO: Allow undef elements?
20748 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20749   if (LegalOperations)
20750     return SDValue();
20751
20752   EVT VT = N->getValueType(0);
20753
20754   bool FoundZeroExtend = false;
20755   SDValue Op0 = N->getOperand(0);
20756   auto checkElem = [&](SDValue Op) -> int64_t {
20757     unsigned Opc = Op.getOpcode();
20758     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20759     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20760         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20761         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20762       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20763         return C->getZExtValue();
20764     return -1;
20765   };
20766
20767   // Make sure the first element matches
20768   // (zext (extract_vector_elt X, C))
20769   // Offset must be a constant multiple of the
20770   // known-minimum vector length of the result type.
20771   int64_t Offset = checkElem(Op0);
20772   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20773     return SDValue();
20774
20775   unsigned NumElems = N->getNumOperands();
20776   SDValue In = Op0.getOperand(0).getOperand(0);
20777   EVT InSVT = In.getValueType().getScalarType();
20778   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20779
20780   // Don't create an illegal input type after type legalization.
20781   if (LegalTypes && !TLI.isTypeLegal(InVT))
20782     return SDValue();
20783
20784   // Ensure all the elements come from the same vector and are adjacent.
20785   for (unsigned i = 1; i != NumElems; ++i) {
20786     if ((Offset + i) != checkElem(N->getOperand(i)))
20787       return SDValue();
20788   }
20789
20790   SDLoc DL(N);
20791   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20792                    Op0.getOperand(0).getOperand(1));
20793   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20794                      VT, In);
20795 }
20796
20797 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20798   EVT VT = N->getValueType(0);
20799
20800   // A vector built entirely of undefs is undef.
20801   if (ISD::allOperandsUndef(N))
20802     return DAG.getUNDEF(VT);
20803
20804   // If this is a splat of a bitcast from another vector, change to a
20805   // concat_vector.
20806   // For example:
20807   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20808   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20809   //
20810   // If X is a build_vector itself, the concat can become a larger build_vector.
20811   // TODO: Maybe this is useful for non-splat too?
20812   if (!LegalOperations) {
20813     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20814       Splat = peekThroughBitcasts(Splat);
20815       EVT SrcVT = Splat.getValueType();
20816       if (SrcVT.isVector()) {
20817         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20818         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20819                                      SrcVT.getVectorElementType(), NumElts);
20820         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20821           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20822           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20823                                        NewVT, Ops);
20824           return DAG.getBitcast(VT, Concat);
20825         }
20826       }
20827     }
20828   }
20829
20830   // Check if we can express BUILD VECTOR via subvector extract.
20831   if (!LegalTypes && (N->getNumOperands() > 1)) {
20832     SDValue Op0 = N->getOperand(0);
20833     auto checkElem = [&](SDValue Op) -> uint64_t {
20834       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20835           (Op0.getOperand(0) == Op.getOperand(0)))
20836         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20837           return CNode->getZExtValue();
20838       return -1;
20839     };
20840
20841     int Offset = checkElem(Op0);
20842     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20843       if (Offset + i != checkElem(N->getOperand(i))) {
20844         Offset = -1;
20845         break;
20846       }
20847     }
20848
20849     if ((Offset == 0) &&
20850         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20851       return Op0.getOperand(0);
20852     if ((Offset != -1) &&
20853         ((Offset % N->getValueType(0).getVectorNumElements()) ==
20854          0)) // IDX must be multiple of output size.
20855       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20856                          Op0.getOperand(0), Op0.getOperand(1));
20857   }
20858
20859   if (SDValue V = convertBuildVecZextToZext(N))
20860     return V;
20861
20862   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20863     return V;
20864
20865   if (SDValue V = reduceBuildVecTruncToBitCast(N))
20866     return V;
20867
20868   if (SDValue V = reduceBuildVecToShuffle(N))
20869     return V;
20870
20871   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20872   // Do this late as some of the above may replace the splat.
20873   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20874     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20875       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20876       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20877     }
20878
20879   return SDValue();
20880 }
20881
20882 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20883   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20884   EVT OpVT = N->getOperand(0).getValueType();
20885
20886   // If the operands are legal vectors, leave them alone.
20887   if (TLI.isTypeLegal(OpVT))
20888     return SDValue();
20889
20890   SDLoc DL(N);
20891   EVT VT = N->getValueType(0);
20892   SmallVector<SDValue, 8> Ops;
20893
20894   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20895   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20896
20897   // Keep track of what we encounter.
20898   bool AnyInteger = false;
20899   bool AnyFP = false;
20900   for (const SDValue &Op : N->ops()) {
20901     if (ISD::BITCAST == Op.getOpcode() &&
20902         !Op.getOperand(0).getValueType().isVector())
20903       Ops.push_back(Op.getOperand(0));
20904     else if (ISD::UNDEF == Op.getOpcode())
20905       Ops.push_back(ScalarUndef);
20906     else
20907       return SDValue();
20908
20909     // Note whether we encounter an integer or floating point scalar.
20910     // If it's neither, bail out, it could be something weird like x86mmx.
20911     EVT LastOpVT = Ops.back().getValueType();
20912     if (LastOpVT.isFloatingPoint())
20913       AnyFP = true;
20914     else if (LastOpVT.isInteger())
20915       AnyInteger = true;
20916     else
20917       return SDValue();
20918   }
20919
20920   // If any of the operands is a floating point scalar bitcast to a vector,
20921   // use floating point types throughout, and bitcast everything.
20922   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20923   if (AnyFP) {
20924     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20925     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20926     if (AnyInteger) {
20927       for (SDValue &Op : Ops) {
20928         if (Op.getValueType() == SVT)
20929           continue;
20930         if (Op.isUndef())
20931           Op = ScalarUndef;
20932         else
20933           Op = DAG.getBitcast(SVT, Op);
20934       }
20935     }
20936   }
20937
20938   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20939                                VT.getSizeInBits() / SVT.getSizeInBits());
20940   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20941 }
20942
20943 // Attempt to merge nested concat_vectors/undefs.
20944 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20945 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20946 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20947                                                   SelectionDAG &DAG) {
20948   EVT VT = N->getValueType(0);
20949
20950   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20951   EVT SubVT;
20952   SDValue FirstConcat;
20953   for (const SDValue &Op : N->ops()) {
20954     if (Op.isUndef())
20955       continue;
20956     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20957       return SDValue();
20958     if (!FirstConcat) {
20959       SubVT = Op.getOperand(0).getValueType();
20960       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20961         return SDValue();
20962       FirstConcat = Op;
20963       continue;
20964     }
20965     if (SubVT != Op.getOperand(0).getValueType())
20966       return SDValue();
20967   }
20968   assert(FirstConcat && "Concat of all-undefs found");
20969
20970   SmallVector<SDValue> ConcatOps;
20971   for (const SDValue &Op : N->ops()) {
20972     if (Op.isUndef()) {
20973       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20974       continue;
20975     }
20976     ConcatOps.append(Op->op_begin(), Op->op_end());
20977   }
20978   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20979 }
20980
20981 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20982 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20983 // most two distinct vectors the same size as the result, attempt to turn this
20984 // into a legal shuffle.
20985 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20986   EVT VT = N->getValueType(0);
20987   EVT OpVT = N->getOperand(0).getValueType();
20988
20989   // We currently can't generate an appropriate shuffle for a scalable vector.
20990   if (VT.isScalableVector())
20991     return SDValue();
20992
20993   int NumElts = VT.getVectorNumElements();
20994   int NumOpElts = OpVT.getVectorNumElements();
20995
20996   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20997   SmallVector<int, 8> Mask;
20998
20999   for (SDValue Op : N->ops()) {
21000     Op = peekThroughBitcasts(Op);
21001
21002     // UNDEF nodes convert to UNDEF shuffle mask values.
21003     if (Op.isUndef()) {
21004       Mask.append((unsigned)NumOpElts, -1);
21005       continue;
21006     }
21007
21008     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21009       return SDValue();
21010
21011     // What vector are we extracting the subvector from and at what index?
21012     SDValue ExtVec = Op.getOperand(0);
21013     int ExtIdx = Op.getConstantOperandVal(1);
21014
21015     // We want the EVT of the original extraction to correctly scale the
21016     // extraction index.
21017     EVT ExtVT = ExtVec.getValueType();
21018     ExtVec = peekThroughBitcasts(ExtVec);
21019
21020     // UNDEF nodes convert to UNDEF shuffle mask values.
21021     if (ExtVec.isUndef()) {
21022       Mask.append((unsigned)NumOpElts, -1);
21023       continue;
21024     }
21025
21026     // Ensure that we are extracting a subvector from a vector the same
21027     // size as the result.
21028     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
21029       return SDValue();
21030
21031     // Scale the subvector index to account for any bitcast.
21032     int NumExtElts = ExtVT.getVectorNumElements();
21033     if (0 == (NumExtElts % NumElts))
21034       ExtIdx /= (NumExtElts / NumElts);
21035     else if (0 == (NumElts % NumExtElts))
21036       ExtIdx *= (NumElts / NumExtElts);
21037     else
21038       return SDValue();
21039
21040     // At most we can reference 2 inputs in the final shuffle.
21041     if (SV0.isUndef() || SV0 == ExtVec) {
21042       SV0 = ExtVec;
21043       for (int i = 0; i != NumOpElts; ++i)
21044         Mask.push_back(i + ExtIdx);
21045     } else if (SV1.isUndef() || SV1 == ExtVec) {
21046       SV1 = ExtVec;
21047       for (int i = 0; i != NumOpElts; ++i)
21048         Mask.push_back(i + ExtIdx + NumElts);
21049     } else {
21050       return SDValue();
21051     }
21052   }
21053
21054   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21055   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
21056                                      DAG.getBitcast(VT, SV1), Mask, DAG);
21057 }
21058
21059 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
21060   unsigned CastOpcode = N->getOperand(0).getOpcode();
21061   switch (CastOpcode) {
21062   case ISD::SINT_TO_FP:
21063   case ISD::UINT_TO_FP:
21064   case ISD::FP_TO_SINT:
21065   case ISD::FP_TO_UINT:
21066     // TODO: Allow more opcodes?
21067     //  case ISD::BITCAST:
21068     //  case ISD::TRUNCATE:
21069     //  case ISD::ZERO_EXTEND:
21070     //  case ISD::SIGN_EXTEND:
21071     //  case ISD::FP_EXTEND:
21072     break;
21073   default:
21074     return SDValue();
21075   }
21076
21077   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
21078   if (!SrcVT.isVector())
21079     return SDValue();
21080
21081   // All operands of the concat must be the same kind of cast from the same
21082   // source type.
21083   SmallVector<SDValue, 4> SrcOps;
21084   for (SDValue Op : N->ops()) {
21085     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
21086         Op.getOperand(0).getValueType() != SrcVT)
21087       return SDValue();
21088     SrcOps.push_back(Op.getOperand(0));
21089   }
21090
21091   // The wider cast must be supported by the target. This is unusual because
21092   // the operation support type parameter depends on the opcode. In addition,
21093   // check the other type in the cast to make sure this is really legal.
21094   EVT VT = N->getValueType(0);
21095   EVT SrcEltVT = SrcVT.getVectorElementType();
21096   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
21097   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
21098   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21099   switch (CastOpcode) {
21100   case ISD::SINT_TO_FP:
21101   case ISD::UINT_TO_FP:
21102     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
21103         !TLI.isTypeLegal(VT))
21104       return SDValue();
21105     break;
21106   case ISD::FP_TO_SINT:
21107   case ISD::FP_TO_UINT:
21108     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
21109         !TLI.isTypeLegal(ConcatSrcVT))
21110       return SDValue();
21111     break;
21112   default:
21113     llvm_unreachable("Unexpected cast opcode");
21114   }
21115
21116   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
21117   SDLoc DL(N);
21118   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
21119   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
21120 }
21121
21122 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
21123   // If we only have one input vector, we don't need to do any concatenation.
21124   if (N->getNumOperands() == 1)
21125     return N->getOperand(0);
21126
21127   // Check if all of the operands are undefs.
21128   EVT VT = N->getValueType(0);
21129   if (ISD::allOperandsUndef(N))
21130     return DAG.getUNDEF(VT);
21131
21132   // Optimize concat_vectors where all but the first of the vectors are undef.
21133   if (all_of(drop_begin(N->ops()),
21134              [](const SDValue &Op) { return Op.isUndef(); })) {
21135     SDValue In = N->getOperand(0);
21136     assert(In.getValueType().isVector() && "Must concat vectors");
21137
21138     // If the input is a concat_vectors, just make a larger concat by padding
21139     // with smaller undefs.
21140     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
21141       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
21142       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
21143       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
21144       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21145     }
21146
21147     SDValue Scalar = peekThroughOneUseBitcasts(In);
21148
21149     // concat_vectors(scalar_to_vector(scalar), undef) ->
21150     //     scalar_to_vector(scalar)
21151     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
21152          Scalar.hasOneUse()) {
21153       EVT SVT = Scalar.getValueType().getVectorElementType();
21154       if (SVT == Scalar.getOperand(0).getValueType())
21155         Scalar = Scalar.getOperand(0);
21156     }
21157
21158     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
21159     if (!Scalar.getValueType().isVector()) {
21160       // If the bitcast type isn't legal, it might be a trunc of a legal type;
21161       // look through the trunc so we can still do the transform:
21162       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
21163       if (Scalar->getOpcode() == ISD::TRUNCATE &&
21164           !TLI.isTypeLegal(Scalar.getValueType()) &&
21165           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
21166         Scalar = Scalar->getOperand(0);
21167
21168       EVT SclTy = Scalar.getValueType();
21169
21170       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
21171         return SDValue();
21172
21173       // Bail out if the vector size is not a multiple of the scalar size.
21174       if (VT.getSizeInBits() % SclTy.getSizeInBits())
21175         return SDValue();
21176
21177       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
21178       if (VNTNumElms < 2)
21179         return SDValue();
21180
21181       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
21182       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
21183         return SDValue();
21184
21185       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
21186       return DAG.getBitcast(VT, Res);
21187     }
21188   }
21189
21190   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
21191   // We have already tested above for an UNDEF only concatenation.
21192   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
21193   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
21194   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
21195     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
21196   };
21197   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
21198     SmallVector<SDValue, 8> Opnds;
21199     EVT SVT = VT.getScalarType();
21200
21201     EVT MinVT = SVT;
21202     if (!SVT.isFloatingPoint()) {
21203       // If BUILD_VECTOR are from built from integer, they may have different
21204       // operand types. Get the smallest type and truncate all operands to it.
21205       bool FoundMinVT = false;
21206       for (const SDValue &Op : N->ops())
21207         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
21208           EVT OpSVT = Op.getOperand(0).getValueType();
21209           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
21210           FoundMinVT = true;
21211         }
21212       assert(FoundMinVT && "Concat vector type mismatch");
21213     }
21214
21215     for (const SDValue &Op : N->ops()) {
21216       EVT OpVT = Op.getValueType();
21217       unsigned NumElts = OpVT.getVectorNumElements();
21218
21219       if (ISD::UNDEF == Op.getOpcode())
21220         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
21221
21222       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
21223         if (SVT.isFloatingPoint()) {
21224           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
21225           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
21226         } else {
21227           for (unsigned i = 0; i != NumElts; ++i)
21228             Opnds.push_back(
21229                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
21230         }
21231       }
21232     }
21233
21234     assert(VT.getVectorNumElements() == Opnds.size() &&
21235            "Concat vector type mismatch");
21236     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
21237   }
21238
21239   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
21240   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
21241   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
21242     return V;
21243
21244   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
21245     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
21246     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
21247       return V;
21248
21249     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
21250     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
21251       return V;
21252   }
21253
21254   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
21255     return V;
21256
21257   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
21258   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
21259   // operands and look for a CONCAT operations that place the incoming vectors
21260   // at the exact same location.
21261   //
21262   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
21263   SDValue SingleSource = SDValue();
21264   unsigned PartNumElem =
21265       N->getOperand(0).getValueType().getVectorMinNumElements();
21266
21267   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
21268     SDValue Op = N->getOperand(i);
21269
21270     if (Op.isUndef())
21271       continue;
21272
21273     // Check if this is the identity extract:
21274     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21275       return SDValue();
21276
21277     // Find the single incoming vector for the extract_subvector.
21278     if (SingleSource.getNode()) {
21279       if (Op.getOperand(0) != SingleSource)
21280         return SDValue();
21281     } else {
21282       SingleSource = Op.getOperand(0);
21283
21284       // Check the source type is the same as the type of the result.
21285       // If not, this concat may extend the vector, so we can not
21286       // optimize it away.
21287       if (SingleSource.getValueType() != N->getValueType(0))
21288         return SDValue();
21289     }
21290
21291     // Check that we are reading from the identity index.
21292     unsigned IdentityIndex = i * PartNumElem;
21293     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
21294       return SDValue();
21295   }
21296
21297   if (SingleSource.getNode())
21298     return SingleSource;
21299
21300   return SDValue();
21301 }
21302
21303 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
21304 // if the subvector can be sourced for free.
21305 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
21306   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
21307       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
21308     return V.getOperand(1);
21309   }
21310   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21311   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
21312       V.getOperand(0).getValueType() == SubVT &&
21313       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
21314     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
21315     return V.getOperand(SubIdx);
21316   }
21317   return SDValue();
21318 }
21319
21320 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
21321                                               SelectionDAG &DAG,
21322                                               bool LegalOperations) {
21323   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21324   SDValue BinOp = Extract->getOperand(0);
21325   unsigned BinOpcode = BinOp.getOpcode();
21326   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
21327     return SDValue();
21328
21329   EVT VecVT = BinOp.getValueType();
21330   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
21331   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
21332     return SDValue();
21333
21334   SDValue Index = Extract->getOperand(1);
21335   EVT SubVT = Extract->getValueType(0);
21336   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
21337     return SDValue();
21338
21339   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
21340   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
21341
21342   // TODO: We could handle the case where only 1 operand is being inserted by
21343   //       creating an extract of the other operand, but that requires checking
21344   //       number of uses and/or costs.
21345   if (!Sub0 || !Sub1)
21346     return SDValue();
21347
21348   // We are inserting both operands of the wide binop only to extract back
21349   // to the narrow vector size. Eliminate all of the insert/extract:
21350   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
21351   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
21352                      BinOp->getFlags());
21353 }
21354
21355 /// If we are extracting a subvector produced by a wide binary operator try
21356 /// to use a narrow binary operator and/or avoid concatenation and extraction.
21357 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
21358                                           bool LegalOperations) {
21359   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
21360   // some of these bailouts with other transforms.
21361
21362   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
21363     return V;
21364
21365   // The extract index must be a constant, so we can map it to a concat operand.
21366   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
21367   if (!ExtractIndexC)
21368     return SDValue();
21369
21370   // We are looking for an optionally bitcasted wide vector binary operator
21371   // feeding an extract subvector.
21372   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21373   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
21374   unsigned BOpcode = BinOp.getOpcode();
21375   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
21376     return SDValue();
21377
21378   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
21379   // reduced to the unary fneg when it is visited, and we probably want to deal
21380   // with fneg in a target-specific way.
21381   if (BOpcode == ISD::FSUB) {
21382     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
21383     if (C && C->getValueAPF().isNegZero())
21384       return SDValue();
21385   }
21386
21387   // The binop must be a vector type, so we can extract some fraction of it.
21388   EVT WideBVT = BinOp.getValueType();
21389   // The optimisations below currently assume we are dealing with fixed length
21390   // vectors. It is possible to add support for scalable vectors, but at the
21391   // moment we've done no analysis to prove whether they are profitable or not.
21392   if (!WideBVT.isFixedLengthVector())
21393     return SDValue();
21394
21395   EVT VT = Extract->getValueType(0);
21396   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
21397   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
21398          "Extract index is not a multiple of the vector length.");
21399
21400   // Bail out if this is not a proper multiple width extraction.
21401   unsigned WideWidth = WideBVT.getSizeInBits();
21402   unsigned NarrowWidth = VT.getSizeInBits();
21403   if (WideWidth % NarrowWidth != 0)
21404     return SDValue();
21405
21406   // Bail out if we are extracting a fraction of a single operation. This can
21407   // occur because we potentially looked through a bitcast of the binop.
21408   unsigned NarrowingRatio = WideWidth / NarrowWidth;
21409   unsigned WideNumElts = WideBVT.getVectorNumElements();
21410   if (WideNumElts % NarrowingRatio != 0)
21411     return SDValue();
21412
21413   // Bail out if the target does not support a narrower version of the binop.
21414   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
21415                                    WideNumElts / NarrowingRatio);
21416   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
21417     return SDValue();
21418
21419   // If extraction is cheap, we don't need to look at the binop operands
21420   // for concat ops. The narrow binop alone makes this transform profitable.
21421   // We can't just reuse the original extract index operand because we may have
21422   // bitcasted.
21423   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
21424   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
21425   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
21426       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
21427     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
21428     SDLoc DL(Extract);
21429     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21430     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21431                             BinOp.getOperand(0), NewExtIndex);
21432     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21433                             BinOp.getOperand(1), NewExtIndex);
21434     SDValue NarrowBinOp =
21435         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
21436     return DAG.getBitcast(VT, NarrowBinOp);
21437   }
21438
21439   // Only handle the case where we are doubling and then halving. A larger ratio
21440   // may require more than two narrow binops to replace the wide binop.
21441   if (NarrowingRatio != 2)
21442     return SDValue();
21443
21444   // TODO: The motivating case for this transform is an x86 AVX1 target. That
21445   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
21446   // flavors, but no other 256-bit integer support. This could be extended to
21447   // handle any binop, but that may require fixing/adding other folds to avoid
21448   // codegen regressions.
21449   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
21450     return SDValue();
21451
21452   // We need at least one concatenation operation of a binop operand to make
21453   // this transform worthwhile. The concat must double the input vector sizes.
21454   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
21455     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
21456       return V.getOperand(ConcatOpNum);
21457     return SDValue();
21458   };
21459   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
21460   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
21461
21462   if (SubVecL || SubVecR) {
21463     // If a binop operand was not the result of a concat, we must extract a
21464     // half-sized operand for our new narrow binop:
21465     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
21466     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
21467     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
21468     SDLoc DL(Extract);
21469     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21470     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
21471                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21472                                       BinOp.getOperand(0), IndexC);
21473
21474     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
21475                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21476                                       BinOp.getOperand(1), IndexC);
21477
21478     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
21479     return DAG.getBitcast(VT, NarrowBinOp);
21480   }
21481
21482   return SDValue();
21483 }
21484
21485 /// If we are extracting a subvector from a wide vector load, convert to a
21486 /// narrow load to eliminate the extraction:
21487 /// (extract_subvector (load wide vector)) --> (load narrow vector)
21488 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
21489   // TODO: Add support for big-endian. The offset calculation must be adjusted.
21490   if (DAG.getDataLayout().isBigEndian())
21491     return SDValue();
21492
21493   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
21494   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
21495     return SDValue();
21496
21497   // Allow targets to opt-out.
21498   EVT VT = Extract->getValueType(0);
21499
21500   // We can only create byte sized loads.
21501   if (!VT.isByteSized())
21502     return SDValue();
21503
21504   unsigned Index = Extract->getConstantOperandVal(1);
21505   unsigned NumElts = VT.getVectorMinNumElements();
21506
21507   // The definition of EXTRACT_SUBVECTOR states that the index must be a
21508   // multiple of the minimum number of elements in the result type.
21509   assert(Index % NumElts == 0 && "The extract subvector index is not a "
21510                                  "multiple of the result's element count");
21511
21512   // It's fine to use TypeSize here as we know the offset will not be negative.
21513   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
21514
21515   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21516   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
21517     return SDValue();
21518
21519   // The narrow load will be offset from the base address of the old load if
21520   // we are extracting from something besides index 0 (little-endian).
21521   SDLoc DL(Extract);
21522
21523   // TODO: Use "BaseIndexOffset" to make this more effective.
21524   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
21525
21526   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
21527   MachineFunction &MF = DAG.getMachineFunction();
21528   MachineMemOperand *MMO;
21529   if (Offset.isScalable()) {
21530     MachinePointerInfo MPI =
21531         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
21532     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
21533   } else
21534     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
21535                                   StoreSize);
21536
21537   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
21538   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
21539   return NewLd;
21540 }
21541
21542 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
21543 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
21544 ///                                EXTRACT_SUBVECTOR(Op?, ?),
21545 ///                                Mask'))
21546 /// iff it is legal and profitable to do so. Notably, the trimmed mask
21547 /// (containing only the elements that are extracted)
21548 /// must reference at most two subvectors.
21549 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
21550                                                      SelectionDAG &DAG,
21551                                                      const TargetLowering &TLI,
21552                                                      bool LegalOperations) {
21553   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21554          "Must only be called on EXTRACT_SUBVECTOR's");
21555
21556   SDValue N0 = N->getOperand(0);
21557
21558   // Only deal with non-scalable vectors.
21559   EVT NarrowVT = N->getValueType(0);
21560   EVT WideVT = N0.getValueType();
21561   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
21562     return SDValue();
21563
21564   // The operand must be a shufflevector.
21565   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
21566   if (!WideShuffleVector)
21567     return SDValue();
21568
21569   // The old shuffleneeds to go away.
21570   if (!WideShuffleVector->hasOneUse())
21571     return SDValue();
21572
21573   // And the narrow shufflevector that we'll form must be legal.
21574   if (LegalOperations &&
21575       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
21576     return SDValue();
21577
21578   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
21579   int NumEltsExtracted = NarrowVT.getVectorNumElements();
21580   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
21581          "Extract index is not a multiple of the output vector length.");
21582
21583   int WideNumElts = WideVT.getVectorNumElements();
21584
21585   SmallVector<int, 16> NewMask;
21586   NewMask.reserve(NumEltsExtracted);
21587   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
21588       DemandedSubvectors;
21589
21590   // Try to decode the wide mask into narrow mask from at most two subvectors.
21591   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
21592                                                   NumEltsExtracted)) {
21593     assert((M >= -1) && (M < (2 * WideNumElts)) &&
21594            "Out-of-bounds shuffle mask?");
21595
21596     if (M < 0) {
21597       // Does not depend on operands, does not require adjustment.
21598       NewMask.emplace_back(M);
21599       continue;
21600     }
21601
21602     // From which operand of the shuffle does this shuffle mask element pick?
21603     int WideShufOpIdx = M / WideNumElts;
21604     // Which element of that operand is picked?
21605     int OpEltIdx = M % WideNumElts;
21606
21607     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
21608            "Shuffle mask vector decomposition failure.");
21609
21610     // And which NumEltsExtracted-sized subvector of that operand is that?
21611     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
21612     // And which element within that subvector of that operand is that?
21613     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
21614
21615     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
21616            "Shuffle mask subvector decomposition failure.");
21617
21618     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
21619             WideShufOpIdx * WideNumElts) == M &&
21620            "Shuffle mask full decomposition failure.");
21621
21622     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
21623
21624     if (Op.isUndef()) {
21625       // Picking from an undef operand. Let's adjust mask instead.
21626       NewMask.emplace_back(-1);
21627       continue;
21628     }
21629
21630     // Profitability check: only deal with extractions from the first subvector.
21631     if (OpSubvecIdx != 0)
21632       return SDValue();
21633
21634     const std::pair<SDValue, int> DemandedSubvector =
21635         std::make_pair(Op, OpSubvecIdx);
21636
21637     if (DemandedSubvectors.insert(DemandedSubvector)) {
21638       if (DemandedSubvectors.size() > 2)
21639         return SDValue(); // We can't handle more than two subvectors.
21640       // How many elements into the WideVT does this subvector start?
21641       int Index = NumEltsExtracted * OpSubvecIdx;
21642       // Bail out if the extraction isn't going to be cheap.
21643       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21644         return SDValue();
21645     }
21646
21647     // Ok, but from which operand of the new shuffle will this element pick?
21648     int NewOpIdx =
21649         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21650     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21651
21652     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21653     NewMask.emplace_back(AdjM);
21654   }
21655   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21656   assert(DemandedSubvectors.size() <= 2 &&
21657          "Should have ended up demanding at most two subvectors.");
21658
21659   // Did we discover that the shuffle does not actually depend on operands?
21660   if (DemandedSubvectors.empty())
21661     return DAG.getUNDEF(NarrowVT);
21662
21663   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
21664   // operand[s]/index[es], so there is no point in checking for it's legality.
21665
21666   // Do not turn a legal shuffle into an illegal one.
21667   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21668       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21669     return SDValue();
21670
21671   SDLoc DL(N);
21672
21673   SmallVector<SDValue, 2> NewOps;
21674   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21675            &DemandedSubvector : DemandedSubvectors) {
21676     // How many elements into the WideVT does this subvector start?
21677     int Index = NumEltsExtracted * DemandedSubvector.second;
21678     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21679     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21680                                     DemandedSubvector.first, IndexC));
21681   }
21682   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21683          "Should end up with either one or two ops");
21684
21685   // If we ended up with only one operand, pad with an undef.
21686   if (NewOps.size() == 1)
21687     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21688
21689   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21690 }
21691
21692 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21693   EVT NVT = N->getValueType(0);
21694   SDValue V = N->getOperand(0);
21695   uint64_t ExtIdx = N->getConstantOperandVal(1);
21696
21697   // Extract from UNDEF is UNDEF.
21698   if (V.isUndef())
21699     return DAG.getUNDEF(NVT);
21700
21701   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
21702     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21703       return NarrowLoad;
21704
21705   // Combine an extract of an extract into a single extract_subvector.
21706   // ext (ext X, C), 0 --> ext X, C
21707   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21708     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21709                                     V.getConstantOperandVal(1)) &&
21710         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
21711       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21712                          V.getOperand(1));
21713     }
21714   }
21715
21716   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
21717   if (V.getOpcode() == ISD::SPLAT_VECTOR)
21718     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
21719       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
21720         return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
21721
21722   // Try to move vector bitcast after extract_subv by scaling extraction index:
21723   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21724   if (V.getOpcode() == ISD::BITCAST &&
21725       V.getOperand(0).getValueType().isVector() &&
21726       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21727     SDValue SrcOp = V.getOperand(0);
21728     EVT SrcVT = SrcOp.getValueType();
21729     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21730     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21731     if ((SrcNumElts % DestNumElts) == 0) {
21732       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21733       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21734       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21735                                       NewExtEC);
21736       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21737         SDLoc DL(N);
21738         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21739         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21740                                          V.getOperand(0), NewIndex);
21741         return DAG.getBitcast(NVT, NewExtract);
21742       }
21743     }
21744     if ((DestNumElts % SrcNumElts) == 0) {
21745       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21746       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21747         ElementCount NewExtEC =
21748             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21749         EVT ScalarVT = SrcVT.getScalarType();
21750         if ((ExtIdx % DestSrcRatio) == 0) {
21751           SDLoc DL(N);
21752           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21753           EVT NewExtVT =
21754               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21755           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21756             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21757             SDValue NewExtract =
21758                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21759                             V.getOperand(0), NewIndex);
21760             return DAG.getBitcast(NVT, NewExtract);
21761           }
21762           if (NewExtEC.isScalar() &&
21763               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
21764             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21765             SDValue NewExtract =
21766                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21767                             V.getOperand(0), NewIndex);
21768             return DAG.getBitcast(NVT, NewExtract);
21769           }
21770         }
21771       }
21772     }
21773   }
21774
21775   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21776     unsigned ExtNumElts = NVT.getVectorMinNumElements();
21777     EVT ConcatSrcVT = V.getOperand(0).getValueType();
21778     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21779            "Concat and extract subvector do not change element type");
21780     assert((ExtIdx % ExtNumElts) == 0 &&
21781            "Extract index is not a multiple of the input vector length.");
21782
21783     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21784     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21785
21786     // If the concatenated source types match this extract, it's a direct
21787     // simplification:
21788     // extract_subvec (concat V1, V2, ...), i --> Vi
21789     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21790       return V.getOperand(ConcatOpIdx);
21791
21792     // If the concatenated source vectors are a multiple length of this extract,
21793     // then extract a fraction of one of those source vectors directly from a
21794     // concat operand. Example:
21795     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21796     //   v2i8 extract_subvec v8i8 Y, 6
21797     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21798         ConcatSrcNumElts % ExtNumElts == 0) {
21799       SDLoc DL(N);
21800       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21801       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21802              "Trying to extract from >1 concat operand?");
21803       assert(NewExtIdx % ExtNumElts == 0 &&
21804              "Extract index is not a multiple of the input vector length.");
21805       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21806       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21807                          V.getOperand(ConcatOpIdx), NewIndexC);
21808     }
21809   }
21810
21811   if (SDValue V =
21812           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21813     return V;
21814
21815   V = peekThroughBitcasts(V);
21816
21817   // If the input is a build vector. Try to make a smaller build vector.
21818   if (V.getOpcode() == ISD::BUILD_VECTOR) {
21819     EVT InVT = V.getValueType();
21820     unsigned ExtractSize = NVT.getSizeInBits();
21821     unsigned EltSize = InVT.getScalarSizeInBits();
21822     // Only do this if we won't split any elements.
21823     if (ExtractSize % EltSize == 0) {
21824       unsigned NumElems = ExtractSize / EltSize;
21825       EVT EltVT = InVT.getVectorElementType();
21826       EVT ExtractVT =
21827           NumElems == 1 ? EltVT
21828                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21829       if ((Level < AfterLegalizeDAG ||
21830            (NumElems == 1 ||
21831             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21832           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21833         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21834
21835         if (NumElems == 1) {
21836           SDValue Src = V->getOperand(IdxVal);
21837           if (EltVT != Src.getValueType())
21838             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21839           return DAG.getBitcast(NVT, Src);
21840         }
21841
21842         // Extract the pieces from the original build_vector.
21843         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21844                                               V->ops().slice(IdxVal, NumElems));
21845         return DAG.getBitcast(NVT, BuildVec);
21846       }
21847     }
21848   }
21849
21850   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21851     // Handle only simple case where vector being inserted and vector
21852     // being extracted are of same size.
21853     EVT SmallVT = V.getOperand(1).getValueType();
21854     if (!NVT.bitsEq(SmallVT))
21855       return SDValue();
21856
21857     // Combine:
21858     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21859     // Into:
21860     //    indices are equal or bit offsets are equal => V1
21861     //    otherwise => (extract_subvec V1, ExtIdx)
21862     uint64_t InsIdx = V.getConstantOperandVal(2);
21863     if (InsIdx * SmallVT.getScalarSizeInBits() ==
21864         ExtIdx * NVT.getScalarSizeInBits()) {
21865       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21866         return SDValue();
21867
21868       return DAG.getBitcast(NVT, V.getOperand(1));
21869     }
21870     return DAG.getNode(
21871         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21872         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21873         N->getOperand(1));
21874   }
21875
21876   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21877     return NarrowBOp;
21878
21879   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21880     return SDValue(N, 0);
21881
21882   return SDValue();
21883 }
21884
21885 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21886 /// followed by concatenation. Narrow vector ops may have better performance
21887 /// than wide ops, and this can unlock further narrowing of other vector ops.
21888 /// Targets can invert this transform later if it is not profitable.
21889 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21890                                          SelectionDAG &DAG) {
21891   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21892   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21893       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21894       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21895     return SDValue();
21896
21897   // Split the wide shuffle mask into halves. Any mask element that is accessing
21898   // operand 1 is offset down to account for narrowing of the vectors.
21899   ArrayRef<int> Mask = Shuf->getMask();
21900   EVT VT = Shuf->getValueType(0);
21901   unsigned NumElts = VT.getVectorNumElements();
21902   unsigned HalfNumElts = NumElts / 2;
21903   SmallVector<int, 16> Mask0(HalfNumElts, -1);
21904   SmallVector<int, 16> Mask1(HalfNumElts, -1);
21905   for (unsigned i = 0; i != NumElts; ++i) {
21906     if (Mask[i] == -1)
21907       continue;
21908     // If we reference the upper (undef) subvector then the element is undef.
21909     if ((Mask[i] % NumElts) >= HalfNumElts)
21910       continue;
21911     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21912     if (i < HalfNumElts)
21913       Mask0[i] = M;
21914     else
21915       Mask1[i - HalfNumElts] = M;
21916   }
21917
21918   // Ask the target if this is a valid transform.
21919   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21920   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21921                                 HalfNumElts);
21922   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21923       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21924     return SDValue();
21925
21926   // shuffle (concat X, undef), (concat Y, undef), Mask -->
21927   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21928   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21929   SDLoc DL(Shuf);
21930   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21931   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21932   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21933 }
21934
21935 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21936 // or turn a shuffle of a single concat into simpler shuffle then concat.
21937 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21938   EVT VT = N->getValueType(0);
21939   unsigned NumElts = VT.getVectorNumElements();
21940
21941   SDValue N0 = N->getOperand(0);
21942   SDValue N1 = N->getOperand(1);
21943   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21944   ArrayRef<int> Mask = SVN->getMask();
21945
21946   SmallVector<SDValue, 4> Ops;
21947   EVT ConcatVT = N0.getOperand(0).getValueType();
21948   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21949   unsigned NumConcats = NumElts / NumElemsPerConcat;
21950
21951   auto IsUndefMaskElt = [](int i) { return i == -1; };
21952
21953   // Special case: shuffle(concat(A,B)) can be more efficiently represented
21954   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21955   // half vector elements.
21956   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21957       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21958                    IsUndefMaskElt)) {
21959     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21960                               N0.getOperand(1),
21961                               Mask.slice(0, NumElemsPerConcat));
21962     N1 = DAG.getUNDEF(ConcatVT);
21963     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21964   }
21965
21966   // Look at every vector that's inserted. We're looking for exact
21967   // subvector-sized copies from a concatenated vector
21968   for (unsigned I = 0; I != NumConcats; ++I) {
21969     unsigned Begin = I * NumElemsPerConcat;
21970     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21971
21972     // Make sure we're dealing with a copy.
21973     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21974       Ops.push_back(DAG.getUNDEF(ConcatVT));
21975       continue;
21976     }
21977
21978     int OpIdx = -1;
21979     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21980       if (IsUndefMaskElt(SubMask[i]))
21981         continue;
21982       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21983         return SDValue();
21984       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21985       if (0 <= OpIdx && EltOpIdx != OpIdx)
21986         return SDValue();
21987       OpIdx = EltOpIdx;
21988     }
21989     assert(0 <= OpIdx && "Unknown concat_vectors op");
21990
21991     if (OpIdx < (int)N0.getNumOperands())
21992       Ops.push_back(N0.getOperand(OpIdx));
21993     else
21994       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21995   }
21996
21997   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21998 }
21999
22000 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22001 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22002 //
22003 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
22004 // a simplification in some sense, but it isn't appropriate in general: some
22005 // BUILD_VECTORs are substantially cheaper than others. The general case
22006 // of a BUILD_VECTOR requires inserting each element individually (or
22007 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
22008 // all constants is a single constant pool load.  A BUILD_VECTOR where each
22009 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
22010 // are undef lowers to a small number of element insertions.
22011 //
22012 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
22013 // We don't fold shuffles where one side is a non-zero constant, and we don't
22014 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
22015 // non-constant operands. This seems to work out reasonably well in practice.
22016 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
22017                                        SelectionDAG &DAG,
22018                                        const TargetLowering &TLI) {
22019   EVT VT = SVN->getValueType(0);
22020   unsigned NumElts = VT.getVectorNumElements();
22021   SDValue N0 = SVN->getOperand(0);
22022   SDValue N1 = SVN->getOperand(1);
22023
22024   if (!N0->hasOneUse())
22025     return SDValue();
22026
22027   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
22028   // discussed above.
22029   if (!N1.isUndef()) {
22030     if (!N1->hasOneUse())
22031       return SDValue();
22032
22033     bool N0AnyConst = isAnyConstantBuildVector(N0);
22034     bool N1AnyConst = isAnyConstantBuildVector(N1);
22035     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
22036       return SDValue();
22037     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
22038       return SDValue();
22039   }
22040
22041   // If both inputs are splats of the same value then we can safely merge this
22042   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
22043   bool IsSplat = false;
22044   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
22045   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
22046   if (BV0 && BV1)
22047     if (SDValue Splat0 = BV0->getSplatValue())
22048       IsSplat = (Splat0 == BV1->getSplatValue());
22049
22050   SmallVector<SDValue, 8> Ops;
22051   SmallSet<SDValue, 16> DuplicateOps;
22052   for (int M : SVN->getMask()) {
22053     SDValue Op = DAG.getUNDEF(VT.getScalarType());
22054     if (M >= 0) {
22055       int Idx = M < (int)NumElts ? M : M - NumElts;
22056       SDValue &S = (M < (int)NumElts ? N0 : N1);
22057       if (S.getOpcode() == ISD::BUILD_VECTOR) {
22058         Op = S.getOperand(Idx);
22059       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22060         SDValue Op0 = S.getOperand(0);
22061         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
22062       } else {
22063         // Operand can't be combined - bail out.
22064         return SDValue();
22065       }
22066     }
22067
22068     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
22069     // generating a splat; semantically, this is fine, but it's likely to
22070     // generate low-quality code if the target can't reconstruct an appropriate
22071     // shuffle.
22072     if (!Op.isUndef() && !isIntOrFPConstant(Op))
22073       if (!IsSplat && !DuplicateOps.insert(Op).second)
22074         return SDValue();
22075
22076     Ops.push_back(Op);
22077   }
22078
22079   // BUILD_VECTOR requires all inputs to be of the same type, find the
22080   // maximum type and extend them all.
22081   EVT SVT = VT.getScalarType();
22082   if (SVT.isInteger())
22083     for (SDValue &Op : Ops)
22084       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
22085   if (SVT != VT.getScalarType())
22086     for (SDValue &Op : Ops)
22087       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
22088                         : (TLI.isZExtFree(Op.getValueType(), SVT)
22089                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
22090                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
22091   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
22092 }
22093
22094 // Match shuffles that can be converted to any_vector_extend_in_reg.
22095 // This is often generated during legalization.
22096 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
22097 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
22098 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
22099                                             SelectionDAG &DAG,
22100                                             const TargetLowering &TLI,
22101                                             bool LegalOperations) {
22102   EVT VT = SVN->getValueType(0);
22103   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
22104
22105   // TODO Add support for big-endian when we have a test case.
22106   if (!VT.isInteger() || IsBigEndian)
22107     return SDValue();
22108
22109   unsigned NumElts = VT.getVectorNumElements();
22110   unsigned EltSizeInBits = VT.getScalarSizeInBits();
22111   ArrayRef<int> Mask = SVN->getMask();
22112   SDValue N0 = SVN->getOperand(0);
22113
22114   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
22115   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
22116     for (unsigned i = 0; i != NumElts; ++i) {
22117       if (Mask[i] < 0)
22118         continue;
22119       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
22120         continue;
22121       return false;
22122     }
22123     return true;
22124   };
22125
22126   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
22127   // power-of-2 extensions as they are the most likely.
22128   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
22129     // Check for non power of 2 vector sizes
22130     if (NumElts % Scale != 0)
22131       continue;
22132     if (!isAnyExtend(Scale))
22133       continue;
22134
22135     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
22136     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
22137     // Never create an illegal type. Only create unsupported operations if we
22138     // are pre-legalization.
22139     if (TLI.isTypeLegal(OutVT))
22140       if (!LegalOperations ||
22141           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
22142         return DAG.getBitcast(VT,
22143                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
22144                                           SDLoc(SVN), OutVT, N0));
22145   }
22146
22147   return SDValue();
22148 }
22149
22150 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
22151 // each source element of a large type into the lowest elements of a smaller
22152 // destination type. This is often generated during legalization.
22153 // If the source node itself was a '*_extend_vector_inreg' node then we should
22154 // then be able to remove it.
22155 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
22156                                         SelectionDAG &DAG) {
22157   EVT VT = SVN->getValueType(0);
22158   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
22159
22160   // TODO Add support for big-endian when we have a test case.
22161   if (!VT.isInteger() || IsBigEndian)
22162     return SDValue();
22163
22164   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
22165
22166   unsigned Opcode = N0.getOpcode();
22167   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
22168       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
22169       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
22170     return SDValue();
22171
22172   SDValue N00 = N0.getOperand(0);
22173   ArrayRef<int> Mask = SVN->getMask();
22174   unsigned NumElts = VT.getVectorNumElements();
22175   unsigned EltSizeInBits = VT.getScalarSizeInBits();
22176   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
22177   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
22178
22179   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
22180     return SDValue();
22181   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
22182
22183   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
22184   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
22185   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
22186   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
22187     for (unsigned i = 0; i != NumElts; ++i) {
22188       if (Mask[i] < 0)
22189         continue;
22190       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
22191         continue;
22192       return false;
22193     }
22194     return true;
22195   };
22196
22197   // At the moment we just handle the case where we've truncated back to the
22198   // same size as before the extension.
22199   // TODO: handle more extension/truncation cases as cases arise.
22200   if (EltSizeInBits != ExtSrcSizeInBits)
22201     return SDValue();
22202
22203   // We can remove *extend_vector_inreg only if the truncation happens at
22204   // the same scale as the extension.
22205   if (isTruncate(ExtScale))
22206     return DAG.getBitcast(VT, N00);
22207
22208   return SDValue();
22209 }
22210
22211 // Combine shuffles of splat-shuffles of the form:
22212 // shuffle (shuffle V, undef, splat-mask), undef, M
22213 // If splat-mask contains undef elements, we need to be careful about
22214 // introducing undef's in the folded mask which are not the result of composing
22215 // the masks of the shuffles.
22216 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
22217                                         SelectionDAG &DAG) {
22218   if (!Shuf->getOperand(1).isUndef())
22219     return SDValue();
22220
22221   // If the inner operand is a known splat with no undefs, just return that directly.
22222   // TODO: Create DemandedElts mask from Shuf's mask.
22223   // TODO: Allow undef elements and merge with the shuffle code below.
22224   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
22225     return Shuf->getOperand(0);
22226
22227   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22228   if (!Splat || !Splat->isSplat())
22229     return SDValue();
22230
22231   ArrayRef<int> ShufMask = Shuf->getMask();
22232   ArrayRef<int> SplatMask = Splat->getMask();
22233   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
22234
22235   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
22236   // every undef mask element in the splat-shuffle has a corresponding undef
22237   // element in the user-shuffle's mask or if the composition of mask elements
22238   // would result in undef.
22239   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
22240   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
22241   //   In this case it is not legal to simplify to the splat-shuffle because we
22242   //   may be exposing the users of the shuffle an undef element at index 1
22243   //   which was not there before the combine.
22244   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
22245   //   In this case the composition of masks yields SplatMask, so it's ok to
22246   //   simplify to the splat-shuffle.
22247   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
22248   //   In this case the composed mask includes all undef elements of SplatMask
22249   //   and in addition sets element zero to undef. It is safe to simplify to
22250   //   the splat-shuffle.
22251   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
22252                                        ArrayRef<int> SplatMask) {
22253     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
22254       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
22255           SplatMask[UserMask[i]] != -1)
22256         return false;
22257     return true;
22258   };
22259   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
22260     return Shuf->getOperand(0);
22261
22262   // Create a new shuffle with a mask that is composed of the two shuffles'
22263   // masks.
22264   SmallVector<int, 32> NewMask;
22265   for (int Idx : ShufMask)
22266     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
22267
22268   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
22269                               Splat->getOperand(0), Splat->getOperand(1),
22270                               NewMask);
22271 }
22272
22273 // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
22274 // the mask can be treated as a larger type.
22275 static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
22276                                        SelectionDAG &DAG,
22277                                        const TargetLowering &TLI,
22278                                        bool LegalOperations) {
22279   SDValue Op0 = SVN->getOperand(0);
22280   SDValue Op1 = SVN->getOperand(1);
22281   EVT VT = SVN->getValueType(0);
22282   if (Op0.getOpcode() != ISD::BITCAST)
22283     return SDValue();
22284   EVT InVT = Op0.getOperand(0).getValueType();
22285   if (!InVT.isVector() ||
22286       (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
22287                           Op1.getOperand(0).getValueType() != InVT)))
22288     return SDValue();
22289   if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
22290       (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
22291     return SDValue();
22292
22293   int VTLanes = VT.getVectorNumElements();
22294   int InLanes = InVT.getVectorNumElements();
22295   if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
22296       (LegalOperations &&
22297        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
22298     return SDValue();
22299   int Factor = VTLanes / InLanes;
22300
22301   // Check that each group of lanes in the mask are either undef or make a valid
22302   // mask for the wider lane type.
22303   ArrayRef<int> Mask = SVN->getMask();
22304   SmallVector<int> NewMask;
22305   if (!widenShuffleMaskElts(Factor, Mask, NewMask))
22306     return SDValue();
22307
22308   if (!TLI.isShuffleMaskLegal(NewMask, InVT))
22309     return SDValue();
22310
22311   // Create the new shuffle with the new mask and bitcast it back to the
22312   // original type.
22313   SDLoc DL(SVN);
22314   Op0 = Op0.getOperand(0);
22315   Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
22316   SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
22317   return DAG.getBitcast(VT, NewShuf);
22318 }
22319
22320 /// Combine shuffle of shuffle of the form:
22321 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
22322 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
22323                                      SelectionDAG &DAG) {
22324   if (!OuterShuf->getOperand(1).isUndef())
22325     return SDValue();
22326   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
22327   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
22328     return SDValue();
22329
22330   ArrayRef<int> OuterMask = OuterShuf->getMask();
22331   ArrayRef<int> InnerMask = InnerShuf->getMask();
22332   unsigned NumElts = OuterMask.size();
22333   assert(NumElts == InnerMask.size() && "Mask length mismatch");
22334   SmallVector<int, 32> CombinedMask(NumElts, -1);
22335   int SplatIndex = -1;
22336   for (unsigned i = 0; i != NumElts; ++i) {
22337     // Undef lanes remain undef.
22338     int OuterMaskElt = OuterMask[i];
22339     if (OuterMaskElt == -1)
22340       continue;
22341
22342     // Peek through the shuffle masks to get the underlying source element.
22343     int InnerMaskElt = InnerMask[OuterMaskElt];
22344     if (InnerMaskElt == -1)
22345       continue;
22346
22347     // Initialize the splatted element.
22348     if (SplatIndex == -1)
22349       SplatIndex = InnerMaskElt;
22350
22351     // Non-matching index - this is not a splat.
22352     if (SplatIndex != InnerMaskElt)
22353       return SDValue();
22354
22355     CombinedMask[i] = InnerMaskElt;
22356   }
22357   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
22358           getSplatIndex(CombinedMask) != -1) &&
22359          "Expected a splat mask");
22360
22361   // TODO: The transform may be a win even if the mask is not legal.
22362   EVT VT = OuterShuf->getValueType(0);
22363   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
22364   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
22365     return SDValue();
22366
22367   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
22368                               InnerShuf->getOperand(1), CombinedMask);
22369 }
22370
22371 /// If the shuffle mask is taking exactly one element from the first vector
22372 /// operand and passing through all other elements from the second vector
22373 /// operand, return the index of the mask element that is choosing an element
22374 /// from the first operand. Otherwise, return -1.
22375 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
22376   int MaskSize = Mask.size();
22377   int EltFromOp0 = -1;
22378   // TODO: This does not match if there are undef elements in the shuffle mask.
22379   // Should we ignore undefs in the shuffle mask instead? The trade-off is
22380   // removing an instruction (a shuffle), but losing the knowledge that some
22381   // vector lanes are not needed.
22382   for (int i = 0; i != MaskSize; ++i) {
22383     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
22384       // We're looking for a shuffle of exactly one element from operand 0.
22385       if (EltFromOp0 != -1)
22386         return -1;
22387       EltFromOp0 = i;
22388     } else if (Mask[i] != i + MaskSize) {
22389       // Nothing from operand 1 can change lanes.
22390       return -1;
22391     }
22392   }
22393   return EltFromOp0;
22394 }
22395
22396 /// If a shuffle inserts exactly one element from a source vector operand into
22397 /// another vector operand and we can access the specified element as a scalar,
22398 /// then we can eliminate the shuffle.
22399 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
22400                                       SelectionDAG &DAG) {
22401   // First, check if we are taking one element of a vector and shuffling that
22402   // element into another vector.
22403   ArrayRef<int> Mask = Shuf->getMask();
22404   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
22405   SDValue Op0 = Shuf->getOperand(0);
22406   SDValue Op1 = Shuf->getOperand(1);
22407   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
22408   if (ShufOp0Index == -1) {
22409     // Commute mask and check again.
22410     ShuffleVectorSDNode::commuteMask(CommutedMask);
22411     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
22412     if (ShufOp0Index == -1)
22413       return SDValue();
22414     // Commute operands to match the commuted shuffle mask.
22415     std::swap(Op0, Op1);
22416     Mask = CommutedMask;
22417   }
22418
22419   // The shuffle inserts exactly one element from operand 0 into operand 1.
22420   // Now see if we can access that element as a scalar via a real insert element
22421   // instruction.
22422   // TODO: We can try harder to locate the element as a scalar. Examples: it
22423   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
22424   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
22425          "Shuffle mask value must be from operand 0");
22426   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
22427     return SDValue();
22428
22429   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
22430   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
22431     return SDValue();
22432
22433   // There's an existing insertelement with constant insertion index, so we
22434   // don't need to check the legality/profitability of a replacement operation
22435   // that differs at most in the constant value. The target should be able to
22436   // lower any of those in a similar way. If not, legalization will expand this
22437   // to a scalar-to-vector plus shuffle.
22438   //
22439   // Note that the shuffle may move the scalar from the position that the insert
22440   // element used. Therefore, our new insert element occurs at the shuffle's
22441   // mask index value, not the insert's index value.
22442   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
22443   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
22444   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
22445                      Op1, Op0.getOperand(1), NewInsIndex);
22446 }
22447
22448 /// If we have a unary shuffle of a shuffle, see if it can be folded away
22449 /// completely. This has the potential to lose undef knowledge because the first
22450 /// shuffle may not have an undef mask element where the second one does. So
22451 /// only call this after doing simplifications based on demanded elements.
22452 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
22453   // shuf (shuf0 X, Y, Mask0), undef, Mask
22454   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22455   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
22456     return SDValue();
22457
22458   ArrayRef<int> Mask = Shuf->getMask();
22459   ArrayRef<int> Mask0 = Shuf0->getMask();
22460   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
22461     // Ignore undef elements.
22462     if (Mask[i] == -1)
22463       continue;
22464     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
22465
22466     // Is the element of the shuffle operand chosen by this shuffle the same as
22467     // the element chosen by the shuffle operand itself?
22468     if (Mask0[Mask[i]] != Mask0[i])
22469       return SDValue();
22470   }
22471   // Every element of this shuffle is identical to the result of the previous
22472   // shuffle, so we can replace this value.
22473   return Shuf->getOperand(0);
22474 }
22475
22476 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
22477   EVT VT = N->getValueType(0);
22478   unsigned NumElts = VT.getVectorNumElements();
22479
22480   SDValue N0 = N->getOperand(0);
22481   SDValue N1 = N->getOperand(1);
22482
22483   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
22484
22485   // Canonicalize shuffle undef, undef -> undef
22486   if (N0.isUndef() && N1.isUndef())
22487     return DAG.getUNDEF(VT);
22488
22489   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
22490
22491   // Canonicalize shuffle v, v -> v, undef
22492   if (N0 == N1)
22493     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
22494                                 createUnaryMask(SVN->getMask(), NumElts));
22495
22496   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
22497   if (N0.isUndef())
22498     return DAG.getCommutedVectorShuffle(*SVN);
22499
22500   // Remove references to rhs if it is undef
22501   if (N1.isUndef()) {
22502     bool Changed = false;
22503     SmallVector<int, 8> NewMask;
22504     for (unsigned i = 0; i != NumElts; ++i) {
22505       int Idx = SVN->getMaskElt(i);
22506       if (Idx >= (int)NumElts) {
22507         Idx = -1;
22508         Changed = true;
22509       }
22510       NewMask.push_back(Idx);
22511     }
22512     if (Changed)
22513       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
22514   }
22515
22516   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
22517     return InsElt;
22518
22519   // A shuffle of a single vector that is a splatted value can always be folded.
22520   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
22521     return V;
22522
22523   if (SDValue V = formSplatFromShuffles(SVN, DAG))
22524     return V;
22525
22526   // If it is a splat, check if the argument vector is another splat or a
22527   // build_vector.
22528   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
22529     int SplatIndex = SVN->getSplatIndex();
22530     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
22531         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
22532       // splat (vector_bo L, R), Index -->
22533       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
22534       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
22535       SDLoc DL(N);
22536       EVT EltVT = VT.getScalarType();
22537       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
22538       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
22539       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
22540       SDValue NewBO =
22541           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
22542       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
22543       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
22544       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
22545     }
22546
22547     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
22548     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
22549     if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
22550         N0.hasOneUse()) {
22551       if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
22552         return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
22553
22554       if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
22555         if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
22556           if (Idx->getAPIntValue() == SplatIndex)
22557             return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
22558     }
22559
22560     // If this is a bit convert that changes the element type of the vector but
22561     // not the number of vector elements, look through it.  Be careful not to
22562     // look though conversions that change things like v4f32 to v2f64.
22563     SDNode *V = N0.getNode();
22564     if (V->getOpcode() == ISD::BITCAST) {
22565       SDValue ConvInput = V->getOperand(0);
22566       if (ConvInput.getValueType().isVector() &&
22567           ConvInput.getValueType().getVectorNumElements() == NumElts)
22568         V = ConvInput.getNode();
22569     }
22570
22571     if (V->getOpcode() == ISD::BUILD_VECTOR) {
22572       assert(V->getNumOperands() == NumElts &&
22573              "BUILD_VECTOR has wrong number of operands");
22574       SDValue Base;
22575       bool AllSame = true;
22576       for (unsigned i = 0; i != NumElts; ++i) {
22577         if (!V->getOperand(i).isUndef()) {
22578           Base = V->getOperand(i);
22579           break;
22580         }
22581       }
22582       // Splat of <u, u, u, u>, return <u, u, u, u>
22583       if (!Base.getNode())
22584         return N0;
22585       for (unsigned i = 0; i != NumElts; ++i) {
22586         if (V->getOperand(i) != Base) {
22587           AllSame = false;
22588           break;
22589         }
22590       }
22591       // Splat of <x, x, x, x>, return <x, x, x, x>
22592       if (AllSame)
22593         return N0;
22594
22595       // Canonicalize any other splat as a build_vector.
22596       SDValue Splatted = V->getOperand(SplatIndex);
22597       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
22598       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
22599
22600       // We may have jumped through bitcasts, so the type of the
22601       // BUILD_VECTOR may not match the type of the shuffle.
22602       if (V->getValueType(0) != VT)
22603         NewBV = DAG.getBitcast(VT, NewBV);
22604       return NewBV;
22605     }
22606   }
22607
22608   // Simplify source operands based on shuffle mask.
22609   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22610     return SDValue(N, 0);
22611
22612   // This is intentionally placed after demanded elements simplification because
22613   // it could eliminate knowledge of undef elements created by this shuffle.
22614   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
22615     return ShufOp;
22616
22617   // Match shuffles that can be converted to any_vector_extend_in_reg.
22618   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
22619     return V;
22620
22621   // Combine "truncate_vector_in_reg" style shuffles.
22622   if (SDValue V = combineTruncationShuffle(SVN, DAG))
22623     return V;
22624
22625   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
22626       Level < AfterLegalizeVectorOps &&
22627       (N1.isUndef() ||
22628       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
22629        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
22630     if (SDValue V = partitionShuffleOfConcats(N, DAG))
22631       return V;
22632   }
22633
22634   // A shuffle of a concat of the same narrow vector can be reduced to use
22635   // only low-half elements of a concat with undef:
22636   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
22637   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
22638       N0.getNumOperands() == 2 &&
22639       N0.getOperand(0) == N0.getOperand(1)) {
22640     int HalfNumElts = (int)NumElts / 2;
22641     SmallVector<int, 8> NewMask;
22642     for (unsigned i = 0; i != NumElts; ++i) {
22643       int Idx = SVN->getMaskElt(i);
22644       if (Idx >= HalfNumElts) {
22645         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
22646         Idx -= HalfNumElts;
22647       }
22648       NewMask.push_back(Idx);
22649     }
22650     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
22651       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
22652       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
22653                                    N0.getOperand(0), UndefVec);
22654       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
22655     }
22656   }
22657
22658   // See if we can replace a shuffle with an insert_subvector.
22659   // e.g. v2i32 into v8i32:
22660   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
22661   // --> insert_subvector(lhs,rhs1,4).
22662   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
22663       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
22664     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
22665       // Ensure RHS subvectors are legal.
22666       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
22667       EVT SubVT = RHS.getOperand(0).getValueType();
22668       int NumSubVecs = RHS.getNumOperands();
22669       int NumSubElts = SubVT.getVectorNumElements();
22670       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
22671       if (!TLI.isTypeLegal(SubVT))
22672         return SDValue();
22673
22674       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
22675       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
22676         return SDValue();
22677
22678       // Search [NumSubElts] spans for RHS sequence.
22679       // TODO: Can we avoid nested loops to increase performance?
22680       SmallVector<int> InsertionMask(NumElts);
22681       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
22682         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
22683           // Reset mask to identity.
22684           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
22685
22686           // Add subvector insertion.
22687           std::iota(InsertionMask.begin() + SubIdx,
22688                     InsertionMask.begin() + SubIdx + NumSubElts,
22689                     NumElts + (SubVec * NumSubElts));
22690
22691           // See if the shuffle mask matches the reference insertion mask.
22692           bool MatchingShuffle = true;
22693           for (int i = 0; i != (int)NumElts; ++i) {
22694             int ExpectIdx = InsertionMask[i];
22695             int ActualIdx = Mask[i];
22696             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
22697               MatchingShuffle = false;
22698               break;
22699             }
22700           }
22701
22702           if (MatchingShuffle)
22703             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22704                                RHS.getOperand(SubVec),
22705                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22706         }
22707       }
22708       return SDValue();
22709     };
22710     ArrayRef<int> Mask = SVN->getMask();
22711     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22712       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22713         return InsertN1;
22714     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22715       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22716       ShuffleVectorSDNode::commuteMask(CommuteMask);
22717       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22718         return InsertN0;
22719     }
22720   }
22721
22722   // If we're not performing a select/blend shuffle, see if we can convert the
22723   // shuffle into a AND node, with all the out-of-lane elements are known zero.
22724   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22725     bool IsInLaneMask = true;
22726     ArrayRef<int> Mask = SVN->getMask();
22727     SmallVector<int, 16> ClearMask(NumElts, -1);
22728     APInt DemandedLHS = APInt::getNullValue(NumElts);
22729     APInt DemandedRHS = APInt::getNullValue(NumElts);
22730     for (int I = 0; I != (int)NumElts; ++I) {
22731       int M = Mask[I];
22732       if (M < 0)
22733         continue;
22734       ClearMask[I] = M == I ? I : (I + NumElts);
22735       IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
22736       if (M != I) {
22737         APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
22738         Demanded.setBit(M % NumElts);
22739       }
22740     }
22741     // TODO: Should we try to mask with N1 as well?
22742     if (!IsInLaneMask &&
22743         (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) &&
22744         (DemandedLHS.isNullValue() ||
22745          DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
22746         (DemandedRHS.isNullValue() ||
22747          DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
22748       SDLoc DL(N);
22749       EVT IntVT = VT.changeVectorElementTypeToInteger();
22750       EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
22751       SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
22752       SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
22753       SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
22754       for (int I = 0; I != (int)NumElts; ++I)
22755         if (0 <= Mask[I])
22756           AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
22757
22758       // See if a clear mask is legal instead of going via
22759       // XformToShuffleWithZero which loses UNDEF mask elements.
22760       if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
22761         return DAG.getBitcast(
22762             VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
22763                                      DAG.getConstant(0, DL, IntVT), ClearMask));
22764
22765       if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
22766         return DAG.getBitcast(
22767             VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
22768                             DAG.getBuildVector(IntVT, DL, AndMask)));
22769     }
22770   }
22771
22772   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22773   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22774   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22775     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22776       return Res;
22777
22778   // If this shuffle only has a single input that is a bitcasted shuffle,
22779   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22780   // back to their original types.
22781   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22782       N1.isUndef() && Level < AfterLegalizeVectorOps &&
22783       TLI.isTypeLegal(VT)) {
22784
22785     SDValue BC0 = peekThroughOneUseBitcasts(N0);
22786     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22787       EVT SVT = VT.getScalarType();
22788       EVT InnerVT = BC0->getValueType(0);
22789       EVT InnerSVT = InnerVT.getScalarType();
22790
22791       // Determine which shuffle works with the smaller scalar type.
22792       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22793       EVT ScaleSVT = ScaleVT.getScalarType();
22794
22795       if (TLI.isTypeLegal(ScaleVT) &&
22796           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22797           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22798         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22799         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22800
22801         // Scale the shuffle masks to the smaller scalar type.
22802         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22803         SmallVector<int, 8> InnerMask;
22804         SmallVector<int, 8> OuterMask;
22805         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22806         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22807
22808         // Merge the shuffle masks.
22809         SmallVector<int, 8> NewMask;
22810         for (int M : OuterMask)
22811           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22812
22813         // Test for shuffle mask legality over both commutations.
22814         SDValue SV0 = BC0->getOperand(0);
22815         SDValue SV1 = BC0->getOperand(1);
22816         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22817         if (!LegalMask) {
22818           std::swap(SV0, SV1);
22819           ShuffleVectorSDNode::commuteMask(NewMask);
22820           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22821         }
22822
22823         if (LegalMask) {
22824           SV0 = DAG.getBitcast(ScaleVT, SV0);
22825           SV1 = DAG.getBitcast(ScaleVT, SV1);
22826           return DAG.getBitcast(
22827               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22828         }
22829       }
22830     }
22831   }
22832
22833   // Match shuffles of bitcasts, so long as the mask can be treated as the
22834   // larger type.
22835   if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
22836     return V;
22837
22838   // Compute the combined shuffle mask for a shuffle with SV0 as the first
22839   // operand, and SV1 as the second operand.
22840   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22841   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22842   auto MergeInnerShuffle =
22843       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22844                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
22845                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22846                      SmallVectorImpl<int> &Mask) -> bool {
22847     // Don't try to fold splats; they're likely to simplify somehow, or they
22848     // might be free.
22849     if (OtherSVN->isSplat())
22850       return false;
22851
22852     SV0 = SV1 = SDValue();
22853     Mask.clear();
22854
22855     for (unsigned i = 0; i != NumElts; ++i) {
22856       int Idx = SVN->getMaskElt(i);
22857       if (Idx < 0) {
22858         // Propagate Undef.
22859         Mask.push_back(Idx);
22860         continue;
22861       }
22862
22863       if (Commute)
22864         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22865
22866       SDValue CurrentVec;
22867       if (Idx < (int)NumElts) {
22868         // This shuffle index refers to the inner shuffle N0. Lookup the inner
22869         // shuffle mask to identify which vector is actually referenced.
22870         Idx = OtherSVN->getMaskElt(Idx);
22871         if (Idx < 0) {
22872           // Propagate Undef.
22873           Mask.push_back(Idx);
22874           continue;
22875         }
22876         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22877                                           : OtherSVN->getOperand(1);
22878       } else {
22879         // This shuffle index references an element within N1.
22880         CurrentVec = N1;
22881       }
22882
22883       // Simple case where 'CurrentVec' is UNDEF.
22884       if (CurrentVec.isUndef()) {
22885         Mask.push_back(-1);
22886         continue;
22887       }
22888
22889       // Canonicalize the shuffle index. We don't know yet if CurrentVec
22890       // will be the first or second operand of the combined shuffle.
22891       Idx = Idx % NumElts;
22892       if (!SV0.getNode() || SV0 == CurrentVec) {
22893         // Ok. CurrentVec is the left hand side.
22894         // Update the mask accordingly.
22895         SV0 = CurrentVec;
22896         Mask.push_back(Idx);
22897         continue;
22898       }
22899       if (!SV1.getNode() || SV1 == CurrentVec) {
22900         // Ok. CurrentVec is the right hand side.
22901         // Update the mask accordingly.
22902         SV1 = CurrentVec;
22903         Mask.push_back(Idx + NumElts);
22904         continue;
22905       }
22906
22907       // Last chance - see if the vector is another shuffle and if it
22908       // uses one of the existing candidate shuffle ops.
22909       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22910         int InnerIdx = CurrentSVN->getMaskElt(Idx);
22911         if (InnerIdx < 0) {
22912           Mask.push_back(-1);
22913           continue;
22914         }
22915         SDValue InnerVec = (InnerIdx < (int)NumElts)
22916                                ? CurrentSVN->getOperand(0)
22917                                : CurrentSVN->getOperand(1);
22918         if (InnerVec.isUndef()) {
22919           Mask.push_back(-1);
22920           continue;
22921         }
22922         InnerIdx %= NumElts;
22923         if (InnerVec == SV0) {
22924           Mask.push_back(InnerIdx);
22925           continue;
22926         }
22927         if (InnerVec == SV1) {
22928           Mask.push_back(InnerIdx + NumElts);
22929           continue;
22930         }
22931       }
22932
22933       // Bail out if we cannot convert the shuffle pair into a single shuffle.
22934       return false;
22935     }
22936
22937     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22938       return true;
22939
22940     // Avoid introducing shuffles with illegal mask.
22941     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22942     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22943     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22944     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22945     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22946     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22947     if (TLI.isShuffleMaskLegal(Mask, VT))
22948       return true;
22949
22950     std::swap(SV0, SV1);
22951     ShuffleVectorSDNode::commuteMask(Mask);
22952     return TLI.isShuffleMaskLegal(Mask, VT);
22953   };
22954
22955   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22956     // Canonicalize shuffles according to rules:
22957     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22958     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22959     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22960     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22961         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22962       // The incoming shuffle must be of the same type as the result of the
22963       // current shuffle.
22964       assert(N1->getOperand(0).getValueType() == VT &&
22965              "Shuffle types don't match");
22966
22967       SDValue SV0 = N1->getOperand(0);
22968       SDValue SV1 = N1->getOperand(1);
22969       bool HasSameOp0 = N0 == SV0;
22970       bool IsSV1Undef = SV1.isUndef();
22971       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22972         // Commute the operands of this shuffle so merging below will trigger.
22973         return DAG.getCommutedVectorShuffle(*SVN);
22974     }
22975
22976     // Canonicalize splat shuffles to the RHS to improve merging below.
22977     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22978     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22979         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22980         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22981         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22982       return DAG.getCommutedVectorShuffle(*SVN);
22983     }
22984
22985     // Try to fold according to rules:
22986     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22987     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22988     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22989     // Don't try to fold shuffles with illegal type.
22990     // Only fold if this shuffle is the only user of the other shuffle.
22991     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22992     for (int i = 0; i != 2; ++i) {
22993       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22994           N->isOnlyUserOf(N->getOperand(i).getNode())) {
22995         // The incoming shuffle must be of the same type as the result of the
22996         // current shuffle.
22997         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22998         assert(OtherSV->getOperand(0).getValueType() == VT &&
22999                "Shuffle types don't match");
23000
23001         SDValue SV0, SV1;
23002         SmallVector<int, 4> Mask;
23003         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
23004                               SV0, SV1, Mask)) {
23005           // Check if all indices in Mask are Undef. In case, propagate Undef.
23006           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
23007             return DAG.getUNDEF(VT);
23008
23009           return DAG.getVectorShuffle(VT, SDLoc(N),
23010                                       SV0 ? SV0 : DAG.getUNDEF(VT),
23011                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
23012         }
23013       }
23014     }
23015
23016     // Merge shuffles through binops if we are able to merge it with at least
23017     // one other shuffles.
23018     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
23019     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
23020     unsigned SrcOpcode = N0.getOpcode();
23021     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
23022         (N1.isUndef() ||
23023          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
23024       // Get binop source ops, or just pass on the undef.
23025       SDValue Op00 = N0.getOperand(0);
23026       SDValue Op01 = N0.getOperand(1);
23027       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
23028       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
23029       // TODO: We might be able to relax the VT check but we don't currently
23030       // have any isBinOp() that has different result/ops VTs so play safe until
23031       // we have test coverage.
23032       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
23033           Op01.getValueType() == VT && Op11.getValueType() == VT &&
23034           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
23035            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
23036            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
23037            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
23038         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
23039                                         SmallVectorImpl<int> &Mask, bool LeftOp,
23040                                         bool Commute) {
23041           SDValue InnerN = Commute ? N1 : N0;
23042           SDValue Op0 = LeftOp ? Op00 : Op01;
23043           SDValue Op1 = LeftOp ? Op10 : Op11;
23044           if (Commute)
23045             std::swap(Op0, Op1);
23046           // Only accept the merged shuffle if we don't introduce undef elements,
23047           // or the inner shuffle already contained undef elements.
23048           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
23049           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
23050                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
23051                                    Mask) &&
23052                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
23053                   llvm::none_of(Mask, [](int M) { return M < 0; }));
23054         };
23055
23056         // Ensure we don't increase the number of shuffles - we must merge a
23057         // shuffle from at least one of the LHS and RHS ops.
23058         bool MergedLeft = false;
23059         SDValue LeftSV0, LeftSV1;
23060         SmallVector<int, 4> LeftMask;
23061         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
23062             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
23063           MergedLeft = true;
23064         } else {
23065           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
23066           LeftSV0 = Op00, LeftSV1 = Op10;
23067         }
23068
23069         bool MergedRight = false;
23070         SDValue RightSV0, RightSV1;
23071         SmallVector<int, 4> RightMask;
23072         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
23073             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
23074           MergedRight = true;
23075         } else {
23076           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
23077           RightSV0 = Op01, RightSV1 = Op11;
23078         }
23079
23080         if (MergedLeft || MergedRight) {
23081           SDLoc DL(N);
23082           SDValue LHS = DAG.getVectorShuffle(
23083               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
23084               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
23085           SDValue RHS = DAG.getVectorShuffle(
23086               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
23087               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
23088           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
23089         }
23090       }
23091     }
23092   }
23093
23094   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
23095     return V;
23096
23097   return SDValue();
23098 }
23099
23100 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
23101   SDValue InVal = N->getOperand(0);
23102   EVT VT = N->getValueType(0);
23103
23104   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
23105   // with a VECTOR_SHUFFLE and possible truncate.
23106   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23107       VT.isFixedLengthVector() &&
23108       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
23109     SDValue InVec = InVal->getOperand(0);
23110     SDValue EltNo = InVal->getOperand(1);
23111     auto InVecT = InVec.getValueType();
23112     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
23113       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
23114       int Elt = C0->getZExtValue();
23115       NewMask[0] = Elt;
23116       // If we have an implict truncate do truncate here as long as it's legal.
23117       // if it's not legal, this should
23118       if (VT.getScalarType() != InVal.getValueType() &&
23119           InVal.getValueType().isScalarInteger() &&
23120           isTypeLegal(VT.getScalarType())) {
23121         SDValue Val =
23122             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
23123         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
23124       }
23125       if (VT.getScalarType() == InVecT.getScalarType() &&
23126           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
23127         SDValue LegalShuffle =
23128           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
23129                                       DAG.getUNDEF(InVecT), NewMask, DAG);
23130         if (LegalShuffle) {
23131           // If the initial vector is the correct size this shuffle is a
23132           // valid result.
23133           if (VT == InVecT)
23134             return LegalShuffle;
23135           // If not we must truncate the vector.
23136           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
23137             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
23138             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
23139                                          InVecT.getVectorElementType(),
23140                                          VT.getVectorNumElements());
23141             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
23142                                LegalShuffle, ZeroIdx);
23143           }
23144         }
23145       }
23146     }
23147   }
23148
23149   return SDValue();
23150 }
23151
23152 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
23153   EVT VT = N->getValueType(0);
23154   SDValue N0 = N->getOperand(0);
23155   SDValue N1 = N->getOperand(1);
23156   SDValue N2 = N->getOperand(2);
23157   uint64_t InsIdx = N->getConstantOperandVal(2);
23158
23159   // If inserting an UNDEF, just return the original vector.
23160   if (N1.isUndef())
23161     return N0;
23162
23163   // If this is an insert of an extracted vector into an undef vector, we can
23164   // just use the input to the extract.
23165   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
23166       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
23167     return N1.getOperand(0);
23168
23169   // Simplify scalar inserts into an undef vector:
23170   // insert_subvector undef, (splat X), N2 -> splat X
23171   if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
23172     return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
23173
23174   // If we are inserting a bitcast value into an undef, with the same
23175   // number of elements, just use the bitcast input of the extract.
23176   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
23177   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
23178   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
23179       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
23180       N1.getOperand(0).getOperand(1) == N2 &&
23181       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
23182           VT.getVectorElementCount() &&
23183       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
23184           VT.getSizeInBits()) {
23185     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
23186   }
23187
23188   // If both N1 and N2 are bitcast values on which insert_subvector
23189   // would makes sense, pull the bitcast through.
23190   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
23191   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
23192   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
23193     SDValue CN0 = N0.getOperand(0);
23194     SDValue CN1 = N1.getOperand(0);
23195     EVT CN0VT = CN0.getValueType();
23196     EVT CN1VT = CN1.getValueType();
23197     if (CN0VT.isVector() && CN1VT.isVector() &&
23198         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
23199         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
23200       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
23201                                       CN0.getValueType(), CN0, CN1, N2);
23202       return DAG.getBitcast(VT, NewINSERT);
23203     }
23204   }
23205
23206   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
23207   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
23208   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
23209   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
23210       N0.getOperand(1).getValueType() == N1.getValueType() &&
23211       N0.getOperand(2) == N2)
23212     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
23213                        N1, N2);
23214
23215   // Eliminate an intermediate insert into an undef vector:
23216   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
23217   // insert_subvector undef, X, N2
23218   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
23219       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
23220     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
23221                        N1.getOperand(1), N2);
23222
23223   // Push subvector bitcasts to the output, adjusting the index as we go.
23224   // insert_subvector(bitcast(v), bitcast(s), c1)
23225   // -> bitcast(insert_subvector(v, s, c2))
23226   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
23227       N1.getOpcode() == ISD::BITCAST) {
23228     SDValue N0Src = peekThroughBitcasts(N0);
23229     SDValue N1Src = peekThroughBitcasts(N1);
23230     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
23231     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
23232     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
23233         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
23234       EVT NewVT;
23235       SDLoc DL(N);
23236       SDValue NewIdx;
23237       LLVMContext &Ctx = *DAG.getContext();
23238       ElementCount NumElts = VT.getVectorElementCount();
23239       unsigned EltSizeInBits = VT.getScalarSizeInBits();
23240       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
23241         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
23242         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
23243         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
23244       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
23245         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
23246         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
23247           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
23248                                    NumElts.divideCoefficientBy(Scale));
23249           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
23250         }
23251       }
23252       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
23253         SDValue Res = DAG.getBitcast(NewVT, N0Src);
23254         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
23255         return DAG.getBitcast(VT, Res);
23256       }
23257     }
23258   }
23259
23260   // Canonicalize insert_subvector dag nodes.
23261   // Example:
23262   // (insert_subvector (insert_subvector A, Idx0), Idx1)
23263   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
23264   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
23265       N1.getValueType() == N0.getOperand(1).getValueType()) {
23266     unsigned OtherIdx = N0.getConstantOperandVal(2);
23267     if (InsIdx < OtherIdx) {
23268       // Swap nodes.
23269       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
23270                                   N0.getOperand(0), N1, N2);
23271       AddToWorklist(NewOp.getNode());
23272       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
23273                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
23274     }
23275   }
23276
23277   // If the input vector is a concatenation, and the insert replaces
23278   // one of the pieces, we can optimize into a single concat_vectors.
23279   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
23280       N0.getOperand(0).getValueType() == N1.getValueType() &&
23281       N0.getOperand(0).getValueType().isScalableVector() ==
23282           N1.getValueType().isScalableVector()) {
23283     unsigned Factor = N1.getValueType().getVectorMinNumElements();
23284     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
23285     Ops[InsIdx / Factor] = N1;
23286     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23287   }
23288
23289   // Simplify source operands based on insertion.
23290   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
23291     return SDValue(N, 0);
23292
23293   return SDValue();
23294 }
23295
23296 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
23297   SDValue N0 = N->getOperand(0);
23298
23299   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
23300   if (N0->getOpcode() == ISD::FP16_TO_FP)
23301     return N0->getOperand(0);
23302
23303   return SDValue();
23304 }
23305
23306 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
23307   SDValue N0 = N->getOperand(0);
23308
23309   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
23310   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
23311     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
23312     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
23313       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
23314                          N0.getOperand(0));
23315     }
23316   }
23317
23318   return SDValue();
23319 }
23320
23321 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
23322   SDValue N0 = N->getOperand(0);
23323
23324   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
23325   if (N0->getOpcode() == ISD::BF16_TO_FP)
23326     return N0->getOperand(0);
23327
23328   return SDValue();
23329 }
23330
23331 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
23332   SDValue N0 = N->getOperand(0);
23333   EVT VT = N0.getValueType();
23334   unsigned Opcode = N->getOpcode();
23335
23336   // VECREDUCE over 1-element vector is just an extract.
23337   if (VT.getVectorElementCount().isScalar()) {
23338     SDLoc dl(N);
23339     SDValue Res =
23340         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
23341                     DAG.getVectorIdxConstant(0, dl));
23342     if (Res.getValueType() != N->getValueType(0))
23343       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
23344     return Res;
23345   }
23346
23347   // On an boolean vector an and/or reduction is the same as a umin/umax
23348   // reduction. Convert them if the latter is legal while the former isn't.
23349   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
23350     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
23351         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
23352     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
23353         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
23354         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
23355       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
23356   }
23357
23358   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
23359   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
23360   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
23361       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
23362     SDValue Vec = N0.getOperand(0);
23363     SDValue Subvec = N0.getOperand(1);
23364     if ((Opcode == ISD::VECREDUCE_OR &&
23365          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
23366         (Opcode == ISD::VECREDUCE_AND &&
23367          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
23368       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
23369   }
23370
23371   return SDValue();
23372 }
23373
23374 SDValue DAGCombiner::visitVPOp(SDNode *N) {
23375   // VP operations in which all vector elements are disabled - either by
23376   // determining that the mask is all false or that the EVL is 0 - can be
23377   // eliminated.
23378   bool AreAllEltsDisabled = false;
23379   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
23380     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
23381   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
23382     AreAllEltsDisabled |=
23383         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
23384
23385   // This is the only generic VP combine we support for now.
23386   if (!AreAllEltsDisabled)
23387     return SDValue();
23388
23389   // Binary operations can be replaced by UNDEF.
23390   if (ISD::isVPBinaryOp(N->getOpcode()))
23391     return DAG.getUNDEF(N->getValueType(0));
23392
23393   // VP Memory operations can be replaced by either the chain (stores) or the
23394   // chain + undef (loads).
23395   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
23396     if (MemSD->writeMem())
23397       return MemSD->getChain();
23398     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
23399   }
23400
23401   // Reduction operations return the start operand when no elements are active.
23402   if (ISD::isVPReduction(N->getOpcode()))
23403     return N->getOperand(0);
23404
23405   return SDValue();
23406 }
23407
23408 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
23409 /// with the destination vector and a zero vector.
23410 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
23411 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
23412 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
23413   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
23414
23415   EVT VT = N->getValueType(0);
23416   SDValue LHS = N->getOperand(0);
23417   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
23418   SDLoc DL(N);
23419
23420   // Make sure we're not running after operation legalization where it
23421   // may have custom lowered the vector shuffles.
23422   if (LegalOperations)
23423     return SDValue();
23424
23425   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
23426     return SDValue();
23427
23428   EVT RVT = RHS.getValueType();
23429   unsigned NumElts = RHS.getNumOperands();
23430
23431   // Attempt to create a valid clear mask, splitting the mask into
23432   // sub elements and checking to see if each is
23433   // all zeros or all ones - suitable for shuffle masking.
23434   auto BuildClearMask = [&](int Split) {
23435     int NumSubElts = NumElts * Split;
23436     int NumSubBits = RVT.getScalarSizeInBits() / Split;
23437
23438     SmallVector<int, 8> Indices;
23439     for (int i = 0; i != NumSubElts; ++i) {
23440       int EltIdx = i / Split;
23441       int SubIdx = i % Split;
23442       SDValue Elt = RHS.getOperand(EltIdx);
23443       // X & undef --> 0 (not undef). So this lane must be converted to choose
23444       // from the zero constant vector (same as if the element had all 0-bits).
23445       if (Elt.isUndef()) {
23446         Indices.push_back(i + NumSubElts);
23447         continue;
23448       }
23449
23450       APInt Bits;
23451       if (isa<ConstantSDNode>(Elt))
23452         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
23453       else if (isa<ConstantFPSDNode>(Elt))
23454         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
23455       else
23456         return SDValue();
23457
23458       // Extract the sub element from the constant bit mask.
23459       if (DAG.getDataLayout().isBigEndian())
23460         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
23461       else
23462         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
23463
23464       if (Bits.isAllOnes())
23465         Indices.push_back(i);
23466       else if (Bits == 0)
23467         Indices.push_back(i + NumSubElts);
23468       else
23469         return SDValue();
23470     }
23471
23472     // Let's see if the target supports this vector_shuffle.
23473     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
23474     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
23475     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
23476       return SDValue();
23477
23478     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
23479     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
23480                                                    DAG.getBitcast(ClearVT, LHS),
23481                                                    Zero, Indices));
23482   };
23483
23484   // Determine maximum split level (byte level masking).
23485   int MaxSplit = 1;
23486   if (RVT.getScalarSizeInBits() % 8 == 0)
23487     MaxSplit = RVT.getScalarSizeInBits() / 8;
23488
23489   for (int Split = 1; Split <= MaxSplit; ++Split)
23490     if (RVT.getScalarSizeInBits() % Split == 0)
23491       if (SDValue S = BuildClearMask(Split))
23492         return S;
23493
23494   return SDValue();
23495 }
23496
23497 /// If a vector binop is performed on splat values, it may be profitable to
23498 /// extract, scalarize, and insert/splat.
23499 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
23500                                       const SDLoc &DL) {
23501   SDValue N0 = N->getOperand(0);
23502   SDValue N1 = N->getOperand(1);
23503   unsigned Opcode = N->getOpcode();
23504   EVT VT = N->getValueType(0);
23505   EVT EltVT = VT.getVectorElementType();
23506   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23507
23508   // TODO: Remove/replace the extract cost check? If the elements are available
23509   //       as scalars, then there may be no extract cost. Should we ask if
23510   //       inserting a scalar back into a vector is cheap instead?
23511   int Index0, Index1;
23512   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
23513   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
23514   // Extract element from splat_vector should be free.
23515   // TODO: use DAG.isSplatValue instead?
23516   bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
23517                            N1.getOpcode() == ISD::SPLAT_VECTOR;
23518   if (!Src0 || !Src1 || Index0 != Index1 ||
23519       Src0.getValueType().getVectorElementType() != EltVT ||
23520       Src1.getValueType().getVectorElementType() != EltVT ||
23521       !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
23522       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
23523     return SDValue();
23524
23525   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
23526   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
23527   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
23528   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
23529
23530   // If all lanes but 1 are undefined, no need to splat the scalar result.
23531   // TODO: Keep track of undefs and use that info in the general case.
23532   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
23533       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
23534       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
23535     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
23536     // build_vec ..undef, (bo X, Y), undef...
23537     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
23538     Ops[Index0] = ScalarBO;
23539     return DAG.getBuildVector(VT, DL, Ops);
23540   }
23541
23542   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
23543   if (VT.isScalableVector())
23544     return DAG.getSplatVector(VT, DL, ScalarBO);
23545   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
23546   return DAG.getBuildVector(VT, DL, Ops);
23547 }
23548
23549 /// Visit a binary vector operation, like ADD.
23550 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
23551   EVT VT = N->getValueType(0);
23552   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
23553
23554   SDValue LHS = N->getOperand(0);
23555   SDValue RHS = N->getOperand(1);
23556   unsigned Opcode = N->getOpcode();
23557   SDNodeFlags Flags = N->getFlags();
23558
23559   // Move unary shuffles with identical masks after a vector binop:
23560   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
23561   //   --> shuffle (VBinOp A, B), Undef, Mask
23562   // This does not require type legality checks because we are creating the
23563   // same types of operations that are in the original sequence. We do have to
23564   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
23565   // though. This code is adapted from the identical transform in instcombine.
23566   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
23567       Opcode != ISD::UREM && Opcode != ISD::SREM &&
23568       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
23569     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
23570     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
23571     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
23572         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
23573         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
23574       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
23575                                      RHS.getOperand(0), Flags);
23576       SDValue UndefV = LHS.getOperand(1);
23577       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
23578     }
23579
23580     // Try to sink a splat shuffle after a binop with a uniform constant.
23581     // This is limited to cases where neither the shuffle nor the constant have
23582     // undefined elements because that could be poison-unsafe or inhibit
23583     // demanded elements analysis. It is further limited to not change a splat
23584     // of an inserted scalar because that may be optimized better by
23585     // load-folding or other target-specific behaviors.
23586     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
23587         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
23588         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23589       // binop (splat X), (splat C) --> splat (binop X, C)
23590       SDValue X = Shuf0->getOperand(0);
23591       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
23592       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23593                                   Shuf0->getMask());
23594     }
23595     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
23596         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
23597         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23598       // binop (splat C), (splat X) --> splat (binop C, X)
23599       SDValue X = Shuf1->getOperand(0);
23600       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
23601       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23602                                   Shuf1->getMask());
23603     }
23604   }
23605
23606   // The following pattern is likely to emerge with vector reduction ops. Moving
23607   // the binary operation ahead of insertion may allow using a narrower vector
23608   // instruction that has better performance than the wide version of the op:
23609   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
23610   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
23611       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
23612       LHS.getOperand(2) == RHS.getOperand(2) &&
23613       (LHS.hasOneUse() || RHS.hasOneUse())) {
23614     SDValue X = LHS.getOperand(1);
23615     SDValue Y = RHS.getOperand(1);
23616     SDValue Z = LHS.getOperand(2);
23617     EVT NarrowVT = X.getValueType();
23618     if (NarrowVT == Y.getValueType() &&
23619         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
23620                                               LegalOperations)) {
23621       // (binop undef, undef) may not return undef, so compute that result.
23622       SDValue VecC =
23623           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
23624       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
23625       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
23626     }
23627   }
23628
23629   // Make sure all but the first op are undef or constant.
23630   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
23631     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
23632            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
23633              return Op.isUndef() ||
23634                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
23635            });
23636   };
23637
23638   // The following pattern is likely to emerge with vector reduction ops. Moving
23639   // the binary operation ahead of the concat may allow using a narrower vector
23640   // instruction that has better performance than the wide version of the op:
23641   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
23642   //   concat (VBinOp X, Y), VecC
23643   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
23644       (LHS.hasOneUse() || RHS.hasOneUse())) {
23645     EVT NarrowVT = LHS.getOperand(0).getValueType();
23646     if (NarrowVT == RHS.getOperand(0).getValueType() &&
23647         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
23648       unsigned NumOperands = LHS.getNumOperands();
23649       SmallVector<SDValue, 4> ConcatOps;
23650       for (unsigned i = 0; i != NumOperands; ++i) {
23651         // This constant fold for operands 1 and up.
23652         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
23653                                         RHS.getOperand(i)));
23654       }
23655
23656       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23657     }
23658   }
23659
23660   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
23661     return V;
23662
23663   return SDValue();
23664 }
23665
23666 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
23667                                     SDValue N2) {
23668   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
23669
23670   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
23671                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
23672
23673   // If we got a simplified select_cc node back from SimplifySelectCC, then
23674   // break it down into a new SETCC node, and a new SELECT node, and then return
23675   // the SELECT node, since we were called with a SELECT node.
23676   if (SCC.getNode()) {
23677     // Check to see if we got a select_cc back (to turn into setcc/select).
23678     // Otherwise, just return whatever node we got back, like fabs.
23679     if (SCC.getOpcode() == ISD::SELECT_CC) {
23680       const SDNodeFlags Flags = N0->getFlags();
23681       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
23682                                   N0.getValueType(),
23683                                   SCC.getOperand(0), SCC.getOperand(1),
23684                                   SCC.getOperand(4), Flags);
23685       AddToWorklist(SETCC.getNode());
23686       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
23687                                          SCC.getOperand(2), SCC.getOperand(3));
23688       SelectNode->setFlags(Flags);
23689       return SelectNode;
23690     }
23691
23692     return SCC;
23693   }
23694   return SDValue();
23695 }
23696
23697 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
23698 /// being selected between, see if we can simplify the select.  Callers of this
23699 /// should assume that TheSelect is deleted if this returns true.  As such, they
23700 /// should return the appropriate thing (e.g. the node) back to the top-level of
23701 /// the DAG combiner loop to avoid it being looked at.
23702 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
23703                                     SDValue RHS) {
23704   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23705   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
23706   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
23707     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
23708       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
23709       SDValue Sqrt = RHS;
23710       ISD::CondCode CC;
23711       SDValue CmpLHS;
23712       const ConstantFPSDNode *Zero = nullptr;
23713
23714       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
23715         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
23716         CmpLHS = TheSelect->getOperand(0);
23717         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
23718       } else {
23719         // SELECT or VSELECT
23720         SDValue Cmp = TheSelect->getOperand(0);
23721         if (Cmp.getOpcode() == ISD::SETCC) {
23722           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
23723           CmpLHS = Cmp.getOperand(0);
23724           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
23725         }
23726       }
23727       if (Zero && Zero->isZero() &&
23728           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
23729           CC == ISD::SETULT || CC == ISD::SETLT)) {
23730         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23731         CombineTo(TheSelect, Sqrt);
23732         return true;
23733       }
23734     }
23735   }
23736   // Cannot simplify select with vector condition
23737   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
23738
23739   // If this is a select from two identical things, try to pull the operation
23740   // through the select.
23741   if (LHS.getOpcode() != RHS.getOpcode() ||
23742       !LHS.hasOneUse() || !RHS.hasOneUse())
23743     return false;
23744
23745   // If this is a load and the token chain is identical, replace the select
23746   // of two loads with a load through a select of the address to load from.
23747   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
23748   // constants have been dropped into the constant pool.
23749   if (LHS.getOpcode() == ISD::LOAD) {
23750     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
23751     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
23752
23753     // Token chains must be identical.
23754     if (LHS.getOperand(0) != RHS.getOperand(0) ||
23755         // Do not let this transformation reduce the number of volatile loads.
23756         // Be conservative for atomics for the moment
23757         // TODO: This does appear to be legal for unordered atomics (see D66309)
23758         !LLD->isSimple() || !RLD->isSimple() ||
23759         // FIXME: If either is a pre/post inc/dec load,
23760         // we'd need to split out the address adjustment.
23761         LLD->isIndexed() || RLD->isIndexed() ||
23762         // If this is an EXTLOAD, the VT's must match.
23763         LLD->getMemoryVT() != RLD->getMemoryVT() ||
23764         // If this is an EXTLOAD, the kind of extension must match.
23765         (LLD->getExtensionType() != RLD->getExtensionType() &&
23766          // The only exception is if one of the extensions is anyext.
23767          LLD->getExtensionType() != ISD::EXTLOAD &&
23768          RLD->getExtensionType() != ISD::EXTLOAD) ||
23769         // FIXME: this discards src value information.  This is
23770         // over-conservative. It would be beneficial to be able to remember
23771         // both potential memory locations.  Since we are discarding
23772         // src value info, don't do the transformation if the memory
23773         // locations are not in the default address space.
23774         LLD->getPointerInfo().getAddrSpace() != 0 ||
23775         RLD->getPointerInfo().getAddrSpace() != 0 ||
23776         // We can't produce a CMOV of a TargetFrameIndex since we won't
23777         // generate the address generation required.
23778         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23779         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23780         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
23781                                       LLD->getBasePtr().getValueType()))
23782       return false;
23783
23784     // The loads must not depend on one another.
23785     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
23786       return false;
23787
23788     // Check that the select condition doesn't reach either load.  If so,
23789     // folding this will induce a cycle into the DAG.  If not, this is safe to
23790     // xform, so create a select of the addresses.
23791
23792     SmallPtrSet<const SDNode *, 32> Visited;
23793     SmallVector<const SDNode *, 16> Worklist;
23794
23795     // Always fail if LLD and RLD are not independent. TheSelect is a
23796     // predecessor to all Nodes in question so we need not search past it.
23797
23798     Visited.insert(TheSelect);
23799     Worklist.push_back(LLD);
23800     Worklist.push_back(RLD);
23801
23802     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23803         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23804       return false;
23805
23806     SDValue Addr;
23807     if (TheSelect->getOpcode() == ISD::SELECT) {
23808       // We cannot do this optimization if any pair of {RLD, LLD} is a
23809       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23810       // Loads, we only need to check if CondNode is a successor to one of the
23811       // loads. We can further avoid this if there's no use of their chain
23812       // value.
23813       SDNode *CondNode = TheSelect->getOperand(0).getNode();
23814       Worklist.push_back(CondNode);
23815
23816       if ((LLD->hasAnyUseOfValue(1) &&
23817            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23818           (RLD->hasAnyUseOfValue(1) &&
23819            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23820         return false;
23821
23822       Addr = DAG.getSelect(SDLoc(TheSelect),
23823                            LLD->getBasePtr().getValueType(),
23824                            TheSelect->getOperand(0), LLD->getBasePtr(),
23825                            RLD->getBasePtr());
23826     } else {  // Otherwise SELECT_CC
23827       // We cannot do this optimization if any pair of {RLD, LLD} is a
23828       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23829       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23830       // one of the loads. We can further avoid this if there's no use of their
23831       // chain value.
23832
23833       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23834       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23835       Worklist.push_back(CondLHS);
23836       Worklist.push_back(CondRHS);
23837
23838       if ((LLD->hasAnyUseOfValue(1) &&
23839            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23840           (RLD->hasAnyUseOfValue(1) &&
23841            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23842         return false;
23843
23844       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23845                          LLD->getBasePtr().getValueType(),
23846                          TheSelect->getOperand(0),
23847                          TheSelect->getOperand(1),
23848                          LLD->getBasePtr(), RLD->getBasePtr(),
23849                          TheSelect->getOperand(4));
23850     }
23851
23852     SDValue Load;
23853     // It is safe to replace the two loads if they have different alignments,
23854     // but the new load must be the minimum (most restrictive) alignment of the
23855     // inputs.
23856     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23857     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23858     if (!RLD->isInvariant())
23859       MMOFlags &= ~MachineMemOperand::MOInvariant;
23860     if (!RLD->isDereferenceable())
23861       MMOFlags &= ~MachineMemOperand::MODereferenceable;
23862     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23863       // FIXME: Discards pointer and AA info.
23864       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23865                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23866                          MMOFlags);
23867     } else {
23868       // FIXME: Discards pointer and AA info.
23869       Load = DAG.getExtLoad(
23870           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23871                                                   : LLD->getExtensionType(),
23872           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23873           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23874     }
23875
23876     // Users of the select now use the result of the load.
23877     CombineTo(TheSelect, Load);
23878
23879     // Users of the old loads now use the new load's chain.  We know the
23880     // old-load value is dead now.
23881     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23882     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23883     return true;
23884   }
23885
23886   return false;
23887 }
23888
23889 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23890 /// bitwise 'and'.
23891 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23892                                             SDValue N1, SDValue N2, SDValue N3,
23893                                             ISD::CondCode CC) {
23894   // If this is a select where the false operand is zero and the compare is a
23895   // check of the sign bit, see if we can perform the "gzip trick":
23896   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23897   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23898   EVT XType = N0.getValueType();
23899   EVT AType = N2.getValueType();
23900   if (!isNullConstant(N3) || !XType.bitsGE(AType))
23901     return SDValue();
23902
23903   // If the comparison is testing for a positive value, we have to invert
23904   // the sign bit mask, so only do that transform if the target has a bitwise
23905   // 'and not' instruction (the invert is free).
23906   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23907     // (X > -1) ? A : 0
23908     // (X >  0) ? X : 0 <-- This is canonical signed max.
23909     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23910       return SDValue();
23911   } else if (CC == ISD::SETLT) {
23912     // (X <  0) ? A : 0
23913     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
23914     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23915       return SDValue();
23916   } else {
23917     return SDValue();
23918   }
23919
23920   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23921   // constant.
23922   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23923   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23924   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23925     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23926     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23927       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23928       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23929       AddToWorklist(Shift.getNode());
23930
23931       if (XType.bitsGT(AType)) {
23932         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23933         AddToWorklist(Shift.getNode());
23934       }
23935
23936       if (CC == ISD::SETGT)
23937         Shift = DAG.getNOT(DL, Shift, AType);
23938
23939       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23940     }
23941   }
23942
23943   unsigned ShCt = XType.getSizeInBits() - 1;
23944   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23945     return SDValue();
23946
23947   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23948   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23949   AddToWorklist(Shift.getNode());
23950
23951   if (XType.bitsGT(AType)) {
23952     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23953     AddToWorklist(Shift.getNode());
23954   }
23955
23956   if (CC == ISD::SETGT)
23957     Shift = DAG.getNOT(DL, Shift, AType);
23958
23959   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23960 }
23961
23962 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23963 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23964   SDValue N0 = N->getOperand(0);
23965   SDValue N1 = N->getOperand(1);
23966   SDValue N2 = N->getOperand(2);
23967   EVT VT = N->getValueType(0);
23968   SDLoc DL(N);
23969
23970   unsigned BinOpc = N1.getOpcode();
23971   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23972     return SDValue();
23973
23974   // The use checks are intentionally on SDNode because we may be dealing
23975   // with opcodes that produce more than one SDValue.
23976   // TODO: Do we really need to check N0 (the condition operand of the select)?
23977   //       But removing that clause could cause an infinite loop...
23978   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23979     return SDValue();
23980
23981   // Binops may include opcodes that return multiple values, so all values
23982   // must be created/propagated from the newly created binops below.
23983   SDVTList OpVTs = N1->getVTList();
23984
23985   // Fold select(cond, binop(x, y), binop(z, y))
23986   //  --> binop(select(cond, x, z), y)
23987   if (N1.getOperand(1) == N2.getOperand(1)) {
23988     SDValue NewSel =
23989         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23990     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23991     NewBinOp->setFlags(N1->getFlags());
23992     NewBinOp->intersectFlagsWith(N2->getFlags());
23993     return NewBinOp;
23994   }
23995
23996   // Fold select(cond, binop(x, y), binop(x, z))
23997   //  --> binop(x, select(cond, y, z))
23998   // Second op VT might be different (e.g. shift amount type)
23999   if (N1.getOperand(0) == N2.getOperand(0) &&
24000       VT == N1.getOperand(1).getValueType() &&
24001       VT == N2.getOperand(1).getValueType()) {
24002     SDValue NewSel =
24003         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
24004     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
24005     NewBinOp->setFlags(N1->getFlags());
24006     NewBinOp->intersectFlagsWith(N2->getFlags());
24007     return NewBinOp;
24008   }
24009
24010   // TODO: Handle isCommutativeBinOp patterns as well?
24011   return SDValue();
24012 }
24013
24014 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
24015 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
24016   SDValue N0 = N->getOperand(0);
24017   EVT VT = N->getValueType(0);
24018   bool IsFabs = N->getOpcode() == ISD::FABS;
24019   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
24020
24021   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
24022     return SDValue();
24023
24024   SDValue Int = N0.getOperand(0);
24025   EVT IntVT = Int.getValueType();
24026
24027   // The operand to cast should be integer.
24028   if (!IntVT.isInteger() || IntVT.isVector())
24029     return SDValue();
24030
24031   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
24032   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
24033   APInt SignMask;
24034   if (N0.getValueType().isVector()) {
24035     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
24036     // 0x7f...) per element and splat it.
24037     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
24038     if (IsFabs)
24039       SignMask = ~SignMask;
24040     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
24041   } else {
24042     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
24043     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
24044     if (IsFabs)
24045       SignMask = ~SignMask;
24046   }
24047   SDLoc DL(N0);
24048   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
24049                     DAG.getConstant(SignMask, DL, IntVT));
24050   AddToWorklist(Int.getNode());
24051   return DAG.getBitcast(VT, Int);
24052 }
24053
24054 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
24055 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
24056 /// in it. This may be a win when the constant is not otherwise available
24057 /// because it replaces two constant pool loads with one.
24058 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
24059     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
24060     ISD::CondCode CC) {
24061   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
24062     return SDValue();
24063
24064   // If we are before legalize types, we want the other legalization to happen
24065   // first (for example, to avoid messing with soft float).
24066   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
24067   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
24068   EVT VT = N2.getValueType();
24069   if (!TV || !FV || !TLI.isTypeLegal(VT))
24070     return SDValue();
24071
24072   // If a constant can be materialized without loads, this does not make sense.
24073   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
24074       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
24075       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
24076     return SDValue();
24077
24078   // If both constants have multiple uses, then we won't need to do an extra
24079   // load. The values are likely around in registers for other users.
24080   if (!TV->hasOneUse() && !FV->hasOneUse())
24081     return SDValue();
24082
24083   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
24084                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
24085   Type *FPTy = Elts[0]->getType();
24086   const DataLayout &TD = DAG.getDataLayout();
24087
24088   // Create a ConstantArray of the two constants.
24089   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
24090   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
24091                                       TD.getPrefTypeAlign(FPTy));
24092   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
24093
24094   // Get offsets to the 0 and 1 elements of the array, so we can select between
24095   // them.
24096   SDValue Zero = DAG.getIntPtrConstant(0, DL);
24097   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
24098   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
24099   SDValue Cond =
24100       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
24101   AddToWorklist(Cond.getNode());
24102   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
24103   AddToWorklist(CstOffset.getNode());
24104   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
24105   AddToWorklist(CPIdx.getNode());
24106   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
24107                      MachinePointerInfo::getConstantPool(
24108                          DAG.getMachineFunction()), Alignment);
24109 }
24110
24111 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
24112 /// where 'cond' is the comparison specified by CC.
24113 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
24114                                       SDValue N2, SDValue N3, ISD::CondCode CC,
24115                                       bool NotExtCompare) {
24116   // (x ? y : y) -> y.
24117   if (N2 == N3) return N2;
24118
24119   EVT CmpOpVT = N0.getValueType();
24120   EVT CmpResVT = getSetCCResultType(CmpOpVT);
24121   EVT VT = N2.getValueType();
24122   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
24123   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
24124   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
24125
24126   // Determine if the condition we're dealing with is constant.
24127   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
24128     AddToWorklist(SCC.getNode());
24129     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
24130       // fold select_cc true, x, y -> x
24131       // fold select_cc false, x, y -> y
24132       return !(SCCC->isZero()) ? N2 : N3;
24133     }
24134   }
24135
24136   if (SDValue V =
24137           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
24138     return V;
24139
24140   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
24141     return V;
24142
24143   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
24144   // where y is has a single bit set.
24145   // A plaintext description would be, we can turn the SELECT_CC into an AND
24146   // when the condition can be materialized as an all-ones register.  Any
24147   // single bit-test can be materialized as an all-ones register with
24148   // shift-left and shift-right-arith.
24149   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
24150       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
24151     SDValue AndLHS = N0->getOperand(0);
24152     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
24153     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
24154       // Shift the tested bit over the sign bit.
24155       const APInt &AndMask = ConstAndRHS->getAPIntValue();
24156       unsigned ShCt = AndMask.getBitWidth() - 1;
24157       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
24158         SDValue ShlAmt =
24159           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
24160                           getShiftAmountTy(AndLHS.getValueType()));
24161         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
24162
24163         // Now arithmetic right shift it all the way over, so the result is
24164         // either all-ones, or zero.
24165         SDValue ShrAmt =
24166           DAG.getConstant(ShCt, SDLoc(Shl),
24167                           getShiftAmountTy(Shl.getValueType()));
24168         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
24169
24170         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
24171       }
24172     }
24173   }
24174
24175   // fold select C, 16, 0 -> shl C, 4
24176   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
24177   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
24178
24179   if ((Fold || Swap) &&
24180       TLI.getBooleanContents(CmpOpVT) ==
24181           TargetLowering::ZeroOrOneBooleanContent &&
24182       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
24183
24184     if (Swap) {
24185       CC = ISD::getSetCCInverse(CC, CmpOpVT);
24186       std::swap(N2C, N3C);
24187     }
24188
24189     // If the caller doesn't want us to simplify this into a zext of a compare,
24190     // don't do it.
24191     if (NotExtCompare && N2C->isOne())
24192       return SDValue();
24193
24194     SDValue Temp, SCC;
24195     // zext (setcc n0, n1)
24196     if (LegalTypes) {
24197       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
24198       if (VT.bitsLT(SCC.getValueType()))
24199         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
24200       else
24201         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
24202     } else {
24203       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
24204       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
24205     }
24206
24207     AddToWorklist(SCC.getNode());
24208     AddToWorklist(Temp.getNode());
24209
24210     if (N2C->isOne())
24211       return Temp;
24212
24213     unsigned ShCt = N2C->getAPIntValue().logBase2();
24214     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
24215       return SDValue();
24216
24217     // shl setcc result by log2 n2c
24218     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
24219                        DAG.getConstant(ShCt, SDLoc(Temp),
24220                                        getShiftAmountTy(Temp.getValueType())));
24221   }
24222
24223   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
24224   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
24225   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
24226   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
24227   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
24228   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
24229   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
24230   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
24231   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
24232     SDValue ValueOnZero = N2;
24233     SDValue Count = N3;
24234     // If the condition is NE instead of E, swap the operands.
24235     if (CC == ISD::SETNE)
24236       std::swap(ValueOnZero, Count);
24237     // Check if the value on zero is a constant equal to the bits in the type.
24238     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
24239       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
24240         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
24241         // legal, combine to just cttz.
24242         if ((Count.getOpcode() == ISD::CTTZ ||
24243              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
24244             N0 == Count.getOperand(0) &&
24245             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
24246           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
24247         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
24248         // legal, combine to just ctlz.
24249         if ((Count.getOpcode() == ISD::CTLZ ||
24250              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
24251             N0 == Count.getOperand(0) &&
24252             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
24253           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
24254       }
24255     }
24256   }
24257
24258   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
24259   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
24260   if (!NotExtCompare && N1C && N2C && N3C &&
24261       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
24262       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
24263        (N1C->isZero() && CC == ISD::SETLT)) &&
24264       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
24265     SDValue ASR = DAG.getNode(
24266         ISD::SRA, DL, CmpOpVT, N0,
24267         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
24268     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
24269                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
24270   }
24271
24272   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
24273     return S;
24274   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
24275     return S;
24276
24277   return SDValue();
24278 }
24279
24280 /// This is a stub for TargetLowering::SimplifySetCC.
24281 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
24282                                    ISD::CondCode Cond, const SDLoc &DL,
24283                                    bool foldBooleans) {
24284   TargetLowering::DAGCombinerInfo
24285     DagCombineInfo(DAG, Level, false, this);
24286   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
24287 }
24288
24289 /// Given an ISD::SDIV node expressing a divide by constant, return
24290 /// a DAG expression to select that will generate the same value by multiplying
24291 /// by a magic number.
24292 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
24293 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
24294   // when optimising for minimum size, we don't want to expand a div to a mul
24295   // and a shift.
24296   if (DAG.getMachineFunction().getFunction().hasMinSize())
24297     return SDValue();
24298
24299   SmallVector<SDNode *, 8> Built;
24300   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
24301     for (SDNode *N : Built)
24302       AddToWorklist(N);
24303     return S;
24304   }
24305
24306   return SDValue();
24307 }
24308
24309 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
24310 /// DAG expression that will generate the same value by right shifting.
24311 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
24312   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
24313   if (!C)
24314     return SDValue();
24315
24316   // Avoid division by zero.
24317   if (C->isZero())
24318     return SDValue();
24319
24320   SmallVector<SDNode *, 8> Built;
24321   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
24322     for (SDNode *N : Built)
24323       AddToWorklist(N);
24324     return S;
24325   }
24326
24327   return SDValue();
24328 }
24329
24330 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
24331 /// expression that will generate the same value by multiplying by a magic
24332 /// number.
24333 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
24334 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
24335   // when optimising for minimum size, we don't want to expand a div to a mul
24336   // and a shift.
24337   if (DAG.getMachineFunction().getFunction().hasMinSize())
24338     return SDValue();
24339
24340   SmallVector<SDNode *, 8> Built;
24341   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
24342     for (SDNode *N : Built)
24343       AddToWorklist(N);
24344     return S;
24345   }
24346
24347   return SDValue();
24348 }
24349
24350 /// Given an ISD::SREM node expressing a remainder by constant power of 2,
24351 /// return a DAG expression that will generate the same value.
24352 SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
24353   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
24354   if (!C)
24355     return SDValue();
24356
24357   // Avoid division by zero.
24358   if (C->isZero())
24359     return SDValue();
24360
24361   SmallVector<SDNode *, 8> Built;
24362   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
24363     for (SDNode *N : Built)
24364       AddToWorklist(N);
24365     return S;
24366   }
24367
24368   return SDValue();
24369 }
24370
24371 /// Determines the LogBase2 value for a non-null input value using the
24372 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
24373 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
24374   EVT VT = V.getValueType();
24375   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
24376   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
24377   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
24378   return LogBase2;
24379 }
24380
24381 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24382 /// For the reciprocal, we need to find the zero of the function:
24383 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
24384 ///     =>
24385 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
24386 ///     does not require additional intermediate precision]
24387 /// For the last iteration, put numerator N into it to gain more precision:
24388 ///   Result = N X_i + X_i (N - N A X_i)
24389 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
24390                                       SDNodeFlags Flags) {
24391   if (LegalDAG)
24392     return SDValue();
24393
24394   // TODO: Handle extended types?
24395   EVT VT = Op.getValueType();
24396   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
24397       VT.getScalarType() != MVT::f64)
24398     return SDValue();
24399
24400   // If estimates are explicitly disabled for this function, we're done.
24401   MachineFunction &MF = DAG.getMachineFunction();
24402   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
24403   if (Enabled == TLI.ReciprocalEstimate::Disabled)
24404     return SDValue();
24405
24406   // Estimates may be explicitly enabled for this type with a custom number of
24407   // refinement steps.
24408   int Iterations = TLI.getDivRefinementSteps(VT, MF);
24409   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
24410     AddToWorklist(Est.getNode());
24411
24412     SDLoc DL(Op);
24413     if (Iterations) {
24414       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
24415
24416       // Newton iterations: Est = Est + Est (N - Arg * Est)
24417       // If this is the last iteration, also multiply by the numerator.
24418       for (int i = 0; i < Iterations; ++i) {
24419         SDValue MulEst = Est;
24420
24421         if (i == Iterations - 1) {
24422           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
24423           AddToWorklist(MulEst.getNode());
24424         }
24425
24426         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
24427         AddToWorklist(NewEst.getNode());
24428
24429         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
24430                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
24431         AddToWorklist(NewEst.getNode());
24432
24433         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
24434         AddToWorklist(NewEst.getNode());
24435
24436         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
24437         AddToWorklist(Est.getNode());
24438       }
24439     } else {
24440       // If no iterations are available, multiply with N.
24441       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
24442       AddToWorklist(Est.getNode());
24443     }
24444
24445     return Est;
24446   }
24447
24448   return SDValue();
24449 }
24450
24451 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24452 /// For the reciprocal sqrt, we need to find the zero of the function:
24453 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24454 ///     =>
24455 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
24456 /// As a result, we precompute A/2 prior to the iteration loop.
24457 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
24458                                          unsigned Iterations,
24459                                          SDNodeFlags Flags, bool Reciprocal) {
24460   EVT VT = Arg.getValueType();
24461   SDLoc DL(Arg);
24462   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
24463
24464   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
24465   // this entire sequence requires only one FP constant.
24466   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
24467   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
24468
24469   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
24470   for (unsigned i = 0; i < Iterations; ++i) {
24471     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
24472     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
24473     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
24474     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
24475   }
24476
24477   // If non-reciprocal square root is requested, multiply the result by Arg.
24478   if (!Reciprocal)
24479     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
24480
24481   return Est;
24482 }
24483
24484 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24485 /// For the reciprocal sqrt, we need to find the zero of the function:
24486 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24487 ///     =>
24488 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
24489 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
24490                                          unsigned Iterations,
24491                                          SDNodeFlags Flags, bool Reciprocal) {
24492   EVT VT = Arg.getValueType();
24493   SDLoc DL(Arg);
24494   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
24495   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
24496
24497   // This routine must enter the loop below to work correctly
24498   // when (Reciprocal == false).
24499   assert(Iterations > 0);
24500
24501   // Newton iterations for reciprocal square root:
24502   // E = (E * -0.5) * ((A * E) * E + -3.0)
24503   for (unsigned i = 0; i < Iterations; ++i) {
24504     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
24505     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
24506     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
24507
24508     // When calculating a square root at the last iteration build:
24509     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
24510     // (notice a common subexpression)
24511     SDValue LHS;
24512     if (Reciprocal || (i + 1) < Iterations) {
24513       // RSQRT: LHS = (E * -0.5)
24514       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
24515     } else {
24516       // SQRT: LHS = (A * E) * -0.5
24517       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
24518     }
24519
24520     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
24521   }
24522
24523   return Est;
24524 }
24525
24526 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
24527 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
24528 /// Op can be zero.
24529 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
24530                                            bool Reciprocal) {
24531   if (LegalDAG)
24532     return SDValue();
24533
24534   // TODO: Handle extended types?
24535   EVT VT = Op.getValueType();
24536   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
24537       VT.getScalarType() != MVT::f64)
24538     return SDValue();
24539
24540   // If estimates are explicitly disabled for this function, we're done.
24541   MachineFunction &MF = DAG.getMachineFunction();
24542   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
24543   if (Enabled == TLI.ReciprocalEstimate::Disabled)
24544     return SDValue();
24545
24546   // Estimates may be explicitly enabled for this type with a custom number of
24547   // refinement steps.
24548   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
24549
24550   bool UseOneConstNR = false;
24551   if (SDValue Est =
24552       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
24553                           Reciprocal)) {
24554     AddToWorklist(Est.getNode());
24555
24556     if (Iterations)
24557       Est = UseOneConstNR
24558             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
24559             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
24560     if (!Reciprocal) {
24561       SDLoc DL(Op);
24562       // Try the target specific test first.
24563       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
24564
24565       // The estimate is now completely wrong if the input was exactly 0.0 or
24566       // possibly a denormal. Force the answer to 0.0 or value provided by
24567       // target for those cases.
24568       Est = DAG.getNode(
24569           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
24570           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
24571     }
24572     return Est;
24573   }
24574
24575   return SDValue();
24576 }
24577
24578 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24579   return buildSqrtEstimateImpl(Op, Flags, true);
24580 }
24581
24582 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24583   return buildSqrtEstimateImpl(Op, Flags, false);
24584 }
24585
24586 /// Return true if there is any possibility that the two addresses overlap.
24587 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
24588
24589   struct MemUseCharacteristics {
24590     bool IsVolatile;
24591     bool IsAtomic;
24592     SDValue BasePtr;
24593     int64_t Offset;
24594     Optional<int64_t> NumBytes;
24595     MachineMemOperand *MMO;
24596   };
24597
24598   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
24599     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
24600       int64_t Offset = 0;
24601       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
24602         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
24603                      ? C->getSExtValue()
24604                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
24605                            ? -1 * C->getSExtValue()
24606                            : 0;
24607       uint64_t Size =
24608           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
24609       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
24610               Offset /*base offset*/,
24611               Optional<int64_t>(Size),
24612               LSN->getMemOperand()};
24613     }
24614     if (const auto *LN = cast<LifetimeSDNode>(N))
24615       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
24616               (LN->hasOffset()) ? LN->getOffset() : 0,
24617               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
24618                                 : Optional<int64_t>(),
24619               (MachineMemOperand *)nullptr};
24620     // Default.
24621     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
24622             (int64_t)0 /*offset*/,
24623             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
24624   };
24625
24626   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
24627                         MUC1 = getCharacteristics(Op1);
24628
24629   // If they are to the same address, then they must be aliases.
24630   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
24631       MUC0.Offset == MUC1.Offset)
24632     return true;
24633
24634   // If they are both volatile then they cannot be reordered.
24635   if (MUC0.IsVolatile && MUC1.IsVolatile)
24636     return true;
24637
24638   // Be conservative about atomics for the moment
24639   // TODO: This is way overconservative for unordered atomics (see D66309)
24640   if (MUC0.IsAtomic && MUC1.IsAtomic)
24641     return true;
24642
24643   if (MUC0.MMO && MUC1.MMO) {
24644     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24645         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24646       return false;
24647   }
24648
24649   // Try to prove that there is aliasing, or that there is no aliasing. Either
24650   // way, we can return now. If nothing can be proved, proceed with more tests.
24651   bool IsAlias;
24652   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
24653                                        DAG, IsAlias))
24654     return IsAlias;
24655
24656   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
24657   // either are not known.
24658   if (!MUC0.MMO || !MUC1.MMO)
24659     return true;
24660
24661   // If one operation reads from invariant memory, and the other may store, they
24662   // cannot alias. These should really be checking the equivalent of mayWrite,
24663   // but it only matters for memory nodes other than load /store.
24664   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24665       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24666     return false;
24667
24668   // If we know required SrcValue1 and SrcValue2 have relatively large
24669   // alignment compared to the size and offset of the access, we may be able
24670   // to prove they do not alias. This check is conservative for now to catch
24671   // cases created by splitting vector types, it only works when the offsets are
24672   // multiples of the size of the data.
24673   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
24674   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
24675   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
24676   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
24677   auto &Size0 = MUC0.NumBytes;
24678   auto &Size1 = MUC1.NumBytes;
24679   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
24680       Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
24681       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
24682       SrcValOffset1 % *Size1 == 0) {
24683     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
24684     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
24685
24686     // There is no overlap between these relatively aligned accesses of
24687     // similar size. Return no alias.
24688     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
24689       return false;
24690   }
24691
24692   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
24693                    ? CombinerGlobalAA
24694                    : DAG.getSubtarget().useAA();
24695 #ifndef NDEBUG
24696   if (CombinerAAOnlyFunc.getNumOccurrences() &&
24697       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
24698     UseAA = false;
24699 #endif
24700
24701   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
24702       Size1) {
24703     // Use alias analysis information.
24704     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
24705     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
24706     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
24707     if (AA->isNoAlias(
24708             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
24709                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
24710             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
24711                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
24712       return false;
24713   }
24714
24715   // Otherwise we have to assume they alias.
24716   return true;
24717 }
24718
24719 /// Walk up chain skipping non-aliasing memory nodes,
24720 /// looking for aliasing nodes and adding them to the Aliases vector.
24721 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
24722                                    SmallVectorImpl<SDValue> &Aliases) {
24723   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
24724   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
24725
24726   // Get alias information for node.
24727   // TODO: relax aliasing for unordered atomics (see D66309)
24728   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
24729
24730   // Starting off.
24731   Chains.push_back(OriginalChain);
24732   unsigned Depth = 0;
24733
24734   // Attempt to improve chain by a single step
24735   auto ImproveChain = [&](SDValue &C) -> bool {
24736     switch (C.getOpcode()) {
24737     case ISD::EntryToken:
24738       // No need to mark EntryToken.
24739       C = SDValue();
24740       return true;
24741     case ISD::LOAD:
24742     case ISD::STORE: {
24743       // Get alias information for C.
24744       // TODO: Relax aliasing for unordered atomics (see D66309)
24745       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
24746                       cast<LSBaseSDNode>(C.getNode())->isSimple();
24747       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
24748         // Look further up the chain.
24749         C = C.getOperand(0);
24750         return true;
24751       }
24752       // Alias, so stop here.
24753       return false;
24754     }
24755
24756     case ISD::CopyFromReg:
24757       // Always forward past past CopyFromReg.
24758       C = C.getOperand(0);
24759       return true;
24760
24761     case ISD::LIFETIME_START:
24762     case ISD::LIFETIME_END: {
24763       // We can forward past any lifetime start/end that can be proven not to
24764       // alias the memory access.
24765       if (!mayAlias(N, C.getNode())) {
24766         // Look further up the chain.
24767         C = C.getOperand(0);
24768         return true;
24769       }
24770       return false;
24771     }
24772     default:
24773       return false;
24774     }
24775   };
24776
24777   // Look at each chain and determine if it is an alias.  If so, add it to the
24778   // aliases list.  If not, then continue up the chain looking for the next
24779   // candidate.
24780   while (!Chains.empty()) {
24781     SDValue Chain = Chains.pop_back_val();
24782
24783     // Don't bother if we've seen Chain before.
24784     if (!Visited.insert(Chain.getNode()).second)
24785       continue;
24786
24787     // For TokenFactor nodes, look at each operand and only continue up the
24788     // chain until we reach the depth limit.
24789     //
24790     // FIXME: The depth check could be made to return the last non-aliasing
24791     // chain we found before we hit a tokenfactor rather than the original
24792     // chain.
24793     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
24794       Aliases.clear();
24795       Aliases.push_back(OriginalChain);
24796       return;
24797     }
24798
24799     if (Chain.getOpcode() == ISD::TokenFactor) {
24800       // We have to check each of the operands of the token factor for "small"
24801       // token factors, so we queue them up.  Adding the operands to the queue
24802       // (stack) in reverse order maintains the original order and increases the
24803       // likelihood that getNode will find a matching token factor (CSE.)
24804       if (Chain.getNumOperands() > 16) {
24805         Aliases.push_back(Chain);
24806         continue;
24807       }
24808       for (unsigned n = Chain.getNumOperands(); n;)
24809         Chains.push_back(Chain.getOperand(--n));
24810       ++Depth;
24811       continue;
24812     }
24813     // Everything else
24814     if (ImproveChain(Chain)) {
24815       // Updated Chain Found, Consider new chain if one exists.
24816       if (Chain.getNode())
24817         Chains.push_back(Chain);
24818       ++Depth;
24819       continue;
24820     }
24821     // No Improved Chain Possible, treat as Alias.
24822     Aliases.push_back(Chain);
24823   }
24824 }
24825
24826 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24827 /// (aliasing node.)
24828 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24829   if (OptLevel == CodeGenOpt::None)
24830     return OldChain;
24831
24832   // Ops for replacing token factor.
24833   SmallVector<SDValue, 8> Aliases;
24834
24835   // Accumulate all the aliases to this node.
24836   GatherAllAliases(N, OldChain, Aliases);
24837
24838   // If no operands then chain to entry token.
24839   if (Aliases.size() == 0)
24840     return DAG.getEntryNode();
24841
24842   // If a single operand then chain to it.  We don't need to revisit it.
24843   if (Aliases.size() == 1)
24844     return Aliases[0];
24845
24846   // Construct a custom tailored token factor.
24847   return DAG.getTokenFactor(SDLoc(N), Aliases);
24848 }
24849
24850 namespace {
24851 // TODO: Replace with with std::monostate when we move to C++17.
24852 struct UnitT { } Unit;
24853 bool operator==(const UnitT &, const UnitT &) { return true; }
24854 bool operator!=(const UnitT &, const UnitT &) { return false; }
24855 } // namespace
24856
24857 // This function tries to collect a bunch of potentially interesting
24858 // nodes to improve the chains of, all at once. This might seem
24859 // redundant, as this function gets called when visiting every store
24860 // node, so why not let the work be done on each store as it's visited?
24861 //
24862 // I believe this is mainly important because mergeConsecutiveStores
24863 // is unable to deal with merging stores of different sizes, so unless
24864 // we improve the chains of all the potential candidates up-front
24865 // before running mergeConsecutiveStores, it might only see some of
24866 // the nodes that will eventually be candidates, and then not be able
24867 // to go from a partially-merged state to the desired final
24868 // fully-merged state.
24869
24870 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24871   SmallVector<StoreSDNode *, 8> ChainedStores;
24872   StoreSDNode *STChain = St;
24873   // Intervals records which offsets from BaseIndex have been covered. In
24874   // the common case, every store writes to the immediately previous address
24875   // space and thus merged with the previous interval at insertion time.
24876
24877   using IMap =
24878       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
24879   IMap::Allocator A;
24880   IMap Intervals(A);
24881
24882   // This holds the base pointer, index, and the offset in bytes from the base
24883   // pointer.
24884   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24885
24886   // We must have a base and an offset.
24887   if (!BasePtr.getBase().getNode())
24888     return false;
24889
24890   // Do not handle stores to undef base pointers.
24891   if (BasePtr.getBase().isUndef())
24892     return false;
24893
24894   // Do not handle stores to opaque types
24895   if (St->getMemoryVT().isZeroSized())
24896     return false;
24897
24898   // BaseIndexOffset assumes that offsets are fixed-size, which
24899   // is not valid for scalable vectors where the offsets are
24900   // scaled by `vscale`, so bail out early.
24901   if (St->getMemoryVT().isScalableVector())
24902     return false;
24903
24904   // Add ST's interval.
24905   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24906
24907   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24908     if (Chain->getMemoryVT().isScalableVector())
24909       return false;
24910
24911     // If the chain has more than one use, then we can't reorder the mem ops.
24912     if (!SDValue(Chain, 0)->hasOneUse())
24913       break;
24914     // TODO: Relax for unordered atomics (see D66309)
24915     if (!Chain->isSimple() || Chain->isIndexed())
24916       break;
24917
24918     // Find the base pointer and offset for this memory node.
24919     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24920     // Check that the base pointer is the same as the original one.
24921     int64_t Offset;
24922     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24923       break;
24924     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24925     // Make sure we don't overlap with other intervals by checking the ones to
24926     // the left or right before inserting.
24927     auto I = Intervals.find(Offset);
24928     // If there's a next interval, we should end before it.
24929     if (I != Intervals.end() && I.start() < (Offset + Length))
24930       break;
24931     // If there's a previous interval, we should start after it.
24932     if (I != Intervals.begin() && (--I).stop() <= Offset)
24933       break;
24934     Intervals.insert(Offset, Offset + Length, Unit);
24935
24936     ChainedStores.push_back(Chain);
24937     STChain = Chain;
24938   }
24939
24940   // If we didn't find a chained store, exit.
24941   if (ChainedStores.size() == 0)
24942     return false;
24943
24944   // Improve all chained stores (St and ChainedStores members) starting from
24945   // where the store chain ended and return single TokenFactor.
24946   SDValue NewChain = STChain->getChain();
24947   SmallVector<SDValue, 8> TFOps;
24948   for (unsigned I = ChainedStores.size(); I;) {
24949     StoreSDNode *S = ChainedStores[--I];
24950     SDValue BetterChain = FindBetterChain(S, NewChain);
24951     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24952         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24953     TFOps.push_back(SDValue(S, 0));
24954     ChainedStores[I] = S;
24955   }
24956
24957   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24958   SDValue BetterChain = FindBetterChain(St, NewChain);
24959   SDValue NewST;
24960   if (St->isTruncatingStore())
24961     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24962                               St->getBasePtr(), St->getMemoryVT(),
24963                               St->getMemOperand());
24964   else
24965     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24966                          St->getBasePtr(), St->getMemOperand());
24967
24968   TFOps.push_back(NewST);
24969
24970   // If we improved every element of TFOps, then we've lost the dependence on
24971   // NewChain to successors of St and we need to add it back to TFOps. Do so at
24972   // the beginning to keep relative order consistent with FindBetterChains.
24973   auto hasImprovedChain = [&](SDValue ST) -> bool {
24974     return ST->getOperand(0) != NewChain;
24975   };
24976   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24977   if (AddNewChain)
24978     TFOps.insert(TFOps.begin(), NewChain);
24979
24980   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24981   CombineTo(St, TF);
24982
24983   // Add TF and its operands to the worklist.
24984   AddToWorklist(TF.getNode());
24985   for (const SDValue &Op : TF->ops())
24986     AddToWorklist(Op.getNode());
24987   AddToWorklist(STChain);
24988   return true;
24989 }
24990
24991 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24992   if (OptLevel == CodeGenOpt::None)
24993     return false;
24994
24995   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24996
24997   // We must have a base and an offset.
24998   if (!BasePtr.getBase().getNode())
24999     return false;
25000
25001   // Do not handle stores to undef base pointers.
25002   if (BasePtr.getBase().isUndef())
25003     return false;
25004
25005   // Directly improve a chain of disjoint stores starting at St.
25006   if (parallelizeChainedStores(St))
25007     return true;
25008
25009   // Improve St's Chain..
25010   SDValue BetterChain = FindBetterChain(St, St->getChain());
25011   if (St->getChain() != BetterChain) {
25012     replaceStoreChain(St, BetterChain);
25013     return true;
25014   }
25015   return false;
25016 }
25017
25018 /// This is the entry point for the file.
25019 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
25020                            CodeGenOpt::Level OptLevel) {
25021   /// This is the main entry point to this class.
25022   DAGCombiner(*this, AA, OptLevel).Run(Level);
25023 }