From 84c15bc018fa2b64f36790ae06f76c418a5407ad Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 3 Jul 2021 18:09:19 +0200
Subject: [PATCH] [SCEVExpander] Support opaque pointers

This adds support for opaque pointers to expandAddToGEP() by always
generating an i8 GEP for opaque pointers. After looking at some other
cases (constexpr GEP folding, SROA GEP generation), I've come around
to the idea that we should use i8 GEPs for opaque pointers, because
the alternative would be to guess a GEP type from surrounding code,
which will not be reliable. Ultimately, i8 GEPs is where we want to
end up anyway, and opaque pointers just make that the natural choice.

There are a couple of other places in SCEVExpander that check pointer
element types, I plan to update those when I run across usable test
coverage that doesn't assert elsewhere.

Differential Revision: https://reviews.llvm.org/D105398
---
 .../Transforms/Utils/ScalarEvolutionExpander.cpp   | 172 +++++++++++----------
 .../Transforms/LoopStrengthReduce/opaque-ptr.ll    |  36 +++++
 2 files changed, 124 insertions(+), 84 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index b62b26e..1cf3f97 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -453,8 +453,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
                                     PointerType *PTy,
                                     Type *Ty,
                                     Value *V) {
-  Type *OriginalElTy = PTy->getElementType();
-  Type *ElTy = OriginalElTy;
   SmallVector<Value *, 4> GepIndices;
   SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
@@ -465,93 +463,97 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
 
   Type *IntIdxTy = DL.getIndexType(PTy);
 
-  // Descend down the pointer's type and attempt to convert the other
-  // operands into GEP indices, at each level. The first index in a GEP
-  // indexes into the array implied by the pointer operand; the rest of
-  // the indices index into the element or field type selected by the
-  // preceding index.
-  for (;;) {
-    // If the scale size is not 0, attempt to factor out a scale for
-    // array indexing.
-    SmallVector<const SCEV *, 8> ScaledOps;
-    if (ElTy->isSized()) {
-      const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
-      if (!ElSize->isZero()) {
-        SmallVector<const SCEV *, 8> NewOps;
-        for (const SCEV *Op : Ops) {
-          const SCEV *Remainder = SE.getConstant(Ty, 0);
-          if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
-            // Op now has ElSize factored out.
-            ScaledOps.push_back(Op);
-            if (!Remainder->isZero())
-              NewOps.push_back(Remainder);
-            AnyNonZeroIndices = true;
-          } else {
-            // The operand was not divisible, so add it to the list of operands
-            // we'll scan next iteration.
-            NewOps.push_back(Op);
+  // For opaque pointers, always generate i8 GEP.
+  if (!PTy->isOpaque()) {
+    // Descend down the pointer's type and attempt to convert the other
+    // operands into GEP indices, at each level. The first index in a GEP
+    // indexes into the array implied by the pointer operand; the rest of
+    // the indices index into the element or field type selected by the
+    // preceding index.
+    Type *ElTy = PTy->getElementType();
+    for (;;) {
+      // If the scale size is not 0, attempt to factor out a scale for
+      // array indexing.
+      SmallVector<const SCEV *, 8> ScaledOps;
+      if (ElTy->isSized()) {
+        const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
+        if (!ElSize->isZero()) {
+          SmallVector<const SCEV *, 8> NewOps;
+          for (const SCEV *Op : Ops) {
+            const SCEV *Remainder = SE.getConstant(Ty, 0);
+            if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
+              // Op now has ElSize factored out.
+              ScaledOps.push_back(Op);
+              if (!Remainder->isZero())
+                NewOps.push_back(Remainder);
+              AnyNonZeroIndices = true;
+            } else {
+              // The operand was not divisible, so add it to the list of
+              // operands we'll scan next iteration.
+              NewOps.push_back(Op);
+            }
+          }
+          // If we made any changes, update Ops.
+          if (!ScaledOps.empty()) {
+            Ops = NewOps;
+            SimplifyAddOperands(Ops, Ty, SE);
           }
-        }
-        // If we made any changes, update Ops.
-        if (!ScaledOps.empty()) {
-          Ops = NewOps;
-          SimplifyAddOperands(Ops, Ty, SE);
         }
       }
-    }
 
-    // Record the scaled array index for this level of the type. If
-    // we didn't find any operands that could be factored, tentatively
-    // assume that element zero was selected (since the zero offset
-    // would obviously be folded away).
-    Value *Scaled =
-        ScaledOps.empty()
-            ? Constant::getNullValue(Ty)
-            : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
-    GepIndices.push_back(Scaled);
-
-    // Collect struct field index operands.
-    while (StructType *STy = dyn_cast<StructType>(ElTy)) {
-      bool FoundFieldNo = false;
-      // An empty struct has no fields.
-      if (STy->getNumElements() == 0) break;
-      // Field offsets are known. See if a constant offset falls within any of
-      // the struct fields.
-      if (Ops.empty())
-        break;
-      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
-        if (SE.getTypeSizeInBits(C->getType()) <= 64) {
-          const StructLayout &SL = *DL.getStructLayout(STy);
-          uint64_t FullOffset = C->getValue()->getZExtValue();
-          if (FullOffset < SL.getSizeInBytes()) {
-            unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
-            GepIndices.push_back(
-                ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
-            ElTy = STy->getTypeAtIndex(ElIdx);
-            Ops[0] =
-                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
-            AnyNonZeroIndices = true;
-            FoundFieldNo = true;
+      // Record the scaled array index for this level of the type. If
+      // we didn't find any operands that could be factored, tentatively
+      // assume that element zero was selected (since the zero offset
+      // would obviously be folded away).
+      Value *Scaled =
+          ScaledOps.empty()
+              ? Constant::getNullValue(Ty)
+              : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
+      GepIndices.push_back(Scaled);
+
+      // Collect struct field index operands.
+      while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+        bool FoundFieldNo = false;
+        // An empty struct has no fields.
+        if (STy->getNumElements() == 0) break;
+        // Field offsets are known. See if a constant offset falls within any of
+        // the struct fields.
+        if (Ops.empty())
+          break;
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+            const StructLayout &SL = *DL.getStructLayout(STy);
+            uint64_t FullOffset = C->getValue()->getZExtValue();
+            if (FullOffset < SL.getSizeInBytes()) {
+              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+              ElTy = STy->getTypeAtIndex(ElIdx);
+              Ops[0] =
+                  SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+            }
           }
+        // If no struct field offsets were found, tentatively assume that
+        // field zero was selected (since the zero offset would obviously
+        // be folded away).
+        if (!FoundFieldNo) {
+          ElTy = STy->getTypeAtIndex(0u);
+          GepIndices.push_back(
+            Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
         }
-      // If no struct field offsets were found, tentatively assume that
-      // field zero was selected (since the zero offset would obviously
-      // be folded away).
-      if (!FoundFieldNo) {
-        ElTy = STy->getTypeAtIndex(0u);
-        GepIndices.push_back(
-          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
       }
-    }
 
-    if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
-      ElTy = ATy->getElementType();
-    else
-      // FIXME: Handle VectorType.
-      // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
-      // constant, therefore can not be factored out. The generated IR is less
-      // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
-      break;
+      if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+        ElTy = ATy->getElementType();
+      else
+        // FIXME: Handle VectorType.
+        // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
+        // constant, therefore can not be factored out. The generated IR is less
+        // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
+        break;
+    }
   }
 
   // If none of the operands were convertible to proper GEP indices, cast
@@ -559,8 +561,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // better than ptrtoint+arithmetic+inttoptr at least.
   if (!AnyNonZeroIndices) {
     // Cast the base to i8*.
-    V = InsertNoopCastOfTo(V,
-       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+    if (!PTy->isOpaque())
+      V = InsertNoopCastOfTo(V,
+         Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
 
     assert(!isa<Instruction>(V) ||
            SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
@@ -636,7 +639,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     Value *Casted = V;
     if (V->getType() != PTy)
       Casted = InsertNoopCastOfTo(Casted, PTy);
-    Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
+    Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices,
+                                   "scevgep");
     Ops.push_back(SE.getUnknown(GEP));
   }
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll b/llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll
new file mode 100644
index 0000000..14acde9
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-n32:64"
+
+define void @test1(ptr %p.start, i64 %len) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P_START:%.*]], i64 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[LOOP]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ [[LEN:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT:    [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %p = phi ptr [ %p.start, %entry ], [ %p.next, %loop ]
+  %i.next = add nuw i64 %i, 1
+  %p.next = getelementptr i32, ptr %p, i64 1
+  load volatile i32, ptr %p.next
+  %c = icmp ne i64 %i.next, %len
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
-- 
2.7.4