Revert "[RISCV] Return InstSeq from generateInstSeqImpl instead of using an output...

author Craig Topper <craig.topper@sifive.com>

Wed, 7 Dec 2022 20:57:04 +0000 (12:57 -0800)

committer Craig Topper <craig.topper@sifive.com>

Wed, 7 Dec 2022 20:59:31 +0000 (12:59 -0800)
author Craig Topper <craig.topper@sifive.com>
Wed, 7 Dec 2022 20:57:04 +0000 (12:57 -0800)
committer Craig Topper <craig.topper@sifive.com>
Wed, 7 Dec 2022 20:59:31 +0000 (12:59 -0800)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

index 8a8e17e..9e09396 100644 (file)
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -45,9 +45,9 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
  }
  
  // Recursively generate a sequence for materializing an integer.
-static RISCVMatInt::InstSeq
-generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) {
-  RISCVMatInt::InstSeq Res;
+static void generateInstSeqImpl(int64_t Val,
+                                const FeatureBitset &ActiveFeatures,
+                                RISCVMatInt::InstSeq &Res) {
    bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
  
    if (isInt<32>(Val)) {
@@ -68,7 +68,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) {
        unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
        Res.emplace_back(AddiOpc, Lo12);
      }
-    return Res;
+    return;
    }
  
    assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
@@ -76,7 +76,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) {
    // Use BSETI for a single bit.
    if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) {
      Res.emplace_back(RISCV::BSETI, Log2_64(Val));
-    return Res;
+    return;
    }
  
    // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
@@ -140,7 +140,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) {
      }
    }
  
-  Res = generateInstSeqImpl(Val, ActiveFeatures);
+  generateInstSeqImpl(Val, ActiveFeatures, Res);
  
    // Skip shift if we were able to use LUI directly.
    if (ShiftAmount) {
@@ -150,8 +150,6 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) {
  
    if (Lo12)
      Res.emplace_back(RISCV::ADDI, Lo12);
-
-  return Res;
  }
  
  static unsigned extractRotateInfo(int64_t Val) {
@@ -174,7 +172,8 @@ static unsigned extractRotateInfo(int64_t Val) {
  
  namespace llvm::RISCVMatInt {
  InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
-  RISCVMatInt::InstSeq Res = generateInstSeqImpl(Val, ActiveFeatures);
+  RISCVMatInt::InstSeq Res;
+  generateInstSeqImpl(Val, ActiveFeatures, Res);
  
    // If the low 12 bits are non-zero, the first expansion may end with an ADDI
    // or ADDIW. If there are trailing zeros, try generating a sign extended
@@ -188,7 +187,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
      // code.
      bool IsShiftedCompressible =
                isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion];
-    auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures);
+    RISCVMatInt::InstSeq TmpSeq;
+    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
      TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
  
      // Keep the new sequence if it is an improvement.
@@ -203,37 +203,36 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
             "Expected RV32 to only need 2 instructions");
      unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined);
      uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+    // Fill in the bits that will be shifted out with 1s. An example where this
+    // helps is trailing one masks with 32 or more ones. This will generate
+    // ADDI -1 and an SRLI.
+    ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
  
-    {
-      // Fill in the bits that will be shifted out with 1s. An example where
-      // this helps is trailing one masks with 32 or more ones. This will
-      // generate ADDI -1 and an SRLI.
-      ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
-      auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures);
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    RISCVMatInt::InstSeq TmpSeq;
+    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
  
-      // Keep the new sequence if it is an improvement.
-      if (TmpSeq.size() < Res.size())
-        Res = TmpSeq;
-    }
+    // Keep the new sequence if it is an improvement.
+    if (TmpSeq.size() < Res.size())
+      Res = TmpSeq;
  
-    {
-      // Some cases can benefit from filling the lower bits with zeros instead.
-      ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
-      auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures);
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    // Some cases can benefit from filling the lower bits with zeros instead.
+    ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+    TmpSeq.clear();
+    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
  
-      // Keep the new sequence if it is an improvement.
-      if (TmpSeq.size() < Res.size())
-        Res = TmpSeq;
-    }
+    // Keep the new sequence if it is an improvement.
+    if (TmpSeq.size() < Res.size())
+      Res = TmpSeq;
  
      // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
      // the end of the sequence.
      if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
        // Try replacing upper bits with 1.
        uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
-      auto TmpSeq = generateInstSeqImpl(LeadingOnesVal, ActiveFeatures);
+      TmpSeq.clear();
+      generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
        TmpSeq.emplace_back(RISCV::ADD_UW, 0);
  
        // Keep the new sequence if it is an improvement.
@@ -263,7 +262,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
        NewVal = Val & ~0x80000000ll;
      }
      if (isInt<32>(NewVal)) {
-      auto TmpSeq = generateInstSeqImpl(NewVal, ActiveFeatures);
+      RISCVMatInt::InstSeq TmpSeq;
+      generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
        TmpSeq.emplace_back(Opc, 31);
        if (TmpSeq.size() < Res.size())
          Res = TmpSeq;
@@ -275,7 +275,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
      int32_t Lo = Lo_32(Val);
      uint32_t Hi = Hi_32(Val);
      Opc = 0;
-    auto TmpSeq = generateInstSeqImpl(Lo, ActiveFeatures);
+    RISCVMatInt::InstSeq TmpSeq;
+    generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
      // Check if it is profitable to use BCLRI/BSETI.
      if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
        Opc = RISCV::BSETI;
@@ -301,6 +302,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
             "Expected RV32 to only need 2 instructions");
      int64_t Div = 0;
      unsigned Opc = 0;
+    RISCVMatInt::InstSeq TmpSeq;
      // Select the opcode and divisor.
      if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
        Div = 3;
@@ -314,7 +316,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
      }
      // Build the new instruction sequence.
      if (Div > 0) {
-      auto TmpSeq = generateInstSeqImpl(Val / Div, ActiveFeatures);
+      generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
        TmpSeq.emplace_back(Opc, 0);
        if (TmpSeq.size() < Res.size())
          Res = TmpSeq;
@@ -339,7 +341,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
          // already been processed to LUI+SH*ADD by previous optimization.
          assert(Lo12 != 0 &&
                 "unexpected instruction sequence for immediate materialisation");
-        auto TmpSeq = generateInstSeqImpl(Hi52 / Div, ActiveFeatures);
+        assert(TmpSeq.empty() && "Expected empty TmpSeq");
+        generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
          TmpSeq.emplace_back(Opc, 0);
          TmpSeq.emplace_back(RISCV::ADDI, Lo12);
          if (TmpSeq.size() < Res.size())
author	Craig Topper <craig.topper@sifive.com>
	Wed, 7 Dec 2022 20:57:04 +0000 (12:57 -0800)
committer	Craig Topper <craig.topper@sifive.com>
	Wed, 7 Dec 2022 20:59:31 +0000 (12:59 -0800)