[AAPointerInfo] handle multiple offsets in PHI

author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 15 Dec 2022 05:52:46 +0000 (11:22 +0530)

committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 15 Dec 2022 06:53:50 +0000 (12:23 +0530)
author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 15 Dec 2022 05:52:46 +0000 (11:22 +0530)
committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 15 Dec 2022 06:53:50 +0000 (12:23 +0530)
diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h

index 539d29e..30bf685 100644 (file)
--- a/llvm/include/llvm/Analysis/CycleAnalysis.h
+++ b/llvm/include/llvm/Analysis/CycleAnalysis.h
@@ -27,6 +27,27 @@ extern template class GenericCycle<SSAContext>;
  using CycleInfo = GenericCycleInfo<SSAContext>;
  using Cycle = CycleInfo::CycleT;
  
+/// Legacy analysis pass which computes a \ref CycleInfo.
+class CycleInfoWrapperPass : public FunctionPass {
+  Function *F = nullptr;
+  CycleInfo CI;
+
+public:
+  static char ID;
+
+  CycleInfoWrapperPass();
+
+  CycleInfo &getResult() { return CI; }
+  const CycleInfo &getResult() const { return CI; }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+  // TODO: verify analysis?
+};
+
  /// Analysis pass which computes a \ref CycleInfo.
  class CycleAnalysis : public AnalysisInfoMixin<CycleAnalysis> {
    friend AnalysisInfoMixin<CycleAnalysis>;
@@ -36,6 +57,8 @@ public:
    /// Provide the result typedef for this analysis pass.
    using Result = CycleInfo;
  
+  using LegacyWrapper = CycleInfoWrapperPass;
+
    /// Run the analysis pass over a function and produce a dominator tree.
    CycleInfo run(Function &F, FunctionAnalysisManager &);
  
@@ -52,27 +75,6 @@ public:
    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
  };
  
-/// Legacy analysis pass which computes a \ref CycleInfo.
-class CycleInfoWrapperPass : public FunctionPass {
-  Function *F = nullptr;
-  CycleInfo CI;
-
-public:
-  static char ID;
-
-  CycleInfoWrapperPass();
-
-  CycleInfo &getCycleInfo() { return CI; }
-  const CycleInfo &getCycleInfo() const { return CI; }
-
-  bool runOnFunction(Function &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void releaseMemory() override;
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
-  // TODO: verify analysis?
-};
-
  } // end namespace llvm
  
  #endif // LLVM_ANALYSIS_CYCLEANALYSIS_H
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h

index 9f01220..0a09e72 100644 (file)
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1090,22 +1090,47 @@ public:
    iterator end() { return IRPositions.end(); }
  };
  
-/// Wrapper for FunctoinAnalysisManager.
+/// Wrapper for FunctionAnalysisManager.
  struct AnalysisGetter {
+  // The client may be running the old pass manager, in which case, we need to
+  // map the requested Analysis to its equivalent wrapper in the old pass
+  // manager. The scheme implemented here does not require every Analysis to be
+  // updated. Only those new analyses that the client cares about in the old
+  // pass manager need to expose a LegacyWrapper type, and that wrapper should
+  // support a getResult() method that matches the new Analysis.
+  //
+  // We need SFINAE to check for the LegacyWrapper, but function templates don't
+  // allow partial specialization, which is needed in this case. So instead, we
+  // use a constexpr bool to perform the SFINAE, and then use this information
+  // inside the function template.
+  template <typename, typename = void> static constexpr bool HasLegacyWrapper = false;
+
    template <typename Analysis>
    typename Analysis::Result *getAnalysis(const Function &F) {
-    if (!FAM || !F.getParent())
-      return nullptr;
-    return &FAM->getResult<Analysis>(const_cast<Function &>(F));
+    if (FAM)
+      return &FAM->getResult<Analysis>(const_cast<Function &>(F));
+    if constexpr (HasLegacyWrapper<Analysis>)
+      if (LegacyPass)
+        return &LegacyPass
+                    ->getAnalysis<typename Analysis::LegacyWrapper>(
+                        const_cast<Function &>(F))
+                    .getResult();
+    return nullptr;
    }
  
    AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
+  AnalysisGetter(Pass *P) : LegacyPass(P) {}
    AnalysisGetter() = default;
  
  private:
    FunctionAnalysisManager *FAM = nullptr;
+  Pass *LegacyPass = nullptr;
  };
  
+template <typename Analysis>
+constexpr bool AnalysisGetter::HasLegacyWrapper<
+      Analysis, std::void_t<typename Analysis::LegacyWrapper>> = true;
+
  /// Data structure to hold cached (LLVM-IR) information.
  ///
  /// All attributes are given an InformationCache object at creation time to
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

index b332054..6feceb0 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -13,6 +13,7 @@
  #include "AMDGPU.h"
  #include "GCNSubtarget.h"
  #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/CycleAnalysis.h"
  #include "llvm/CodeGen/TargetPassConfig.h"
  #include "llvm/IR/IntrinsicsAMDGPU.h"
  #include "llvm/IR/IntrinsicsR600.h"
@@ -21,6 +22,10 @@
  
  #define DEBUG_TYPE "amdgpu-attributor"
  
+namespace llvm {
+void initializeCycleInfoWrapperPassPass(PassRegistry &);
+}
+
  using namespace llvm;
  
  #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
@@ -747,7 +752,7 @@ public:
  
    bool runOnModule(Module &M) override {
      SetVector<Function *> Functions;
-    AnalysisGetter AG;
+    AnalysisGetter AG(this);
      for (Function &F : M) {
        if (!F.isIntrinsic())
          Functions.insert(&F);
@@ -782,6 +787,10 @@ public:
      return Change == ChangeStatus::CHANGED;
    }
  
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<CycleInfoWrapperPass>();
+  }
+
    StringRef getPassName() const override { return "AMDGPU Attributor"; }
    TargetMachine *TM;
    static char ID;
@@ -791,4 +800,8 @@ public:
  char AMDGPUAttributor::ID = 0;
  
  Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
-INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
+INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
+INITIALIZE_PASS_END(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
+                    false)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp

index a472d7b..5a2e4a8 100644 (file)
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -28,6 +28,7 @@
  #include "llvm/Analysis/AssumeBundleQueries.h"
  #include "llvm/Analysis/AssumptionCache.h"
  #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CycleAnalysis.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LazyValueInfo.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
@@ -1442,10 +1443,13 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
      return true;
    };
  
+  const auto *F = getAnchorScope();
+  const auto *CI =
+      F ? A.getInfoCache().getAnalysisResultForFunction<CycleAnalysis>(*F)
+        : nullptr;
    const auto *TLI =
-      getAnchorScope()
-          ? A.getInfoCache().getTargetLibraryInfoForFunction(*getAnchorScope())
-          : nullptr;
+      F ? A.getInfoCache().getTargetLibraryInfoForFunction(*F) : nullptr;
+
    auto UsePred = [&](const Use &U, bool &Follow) -> bool {
      Value *CurPtr = U.get();
      User *Usr = U.getUser();
@@ -1517,14 +1521,39 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
          return true;
        }
  
-      // Check if the PHI operand is not dependent on the PHI itself.
+      // Check if the PHI operand can be traced back to AssociatedValue.
        APInt Offset(
            DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
            0);
        Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets(
            DL, Offset, /* AllowNonInbounds */ true);
        auto It = OffsetInfoMap.find(CurPtrBase);
-      if (It != OffsetInfoMap.end()) {
+      if (It == OffsetInfoMap.end()) {
+        LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
+                          << *CurPtr << " in " << *Usr << "\n");
+        UsrOI.setUnknown();
+        Follow = true;
+        return true;
+      }
+
+      auto mayBeInCycleHeader = [](const CycleInfo *CI, const Instruction *I) {
+        if (!CI)
+          return true;
+        auto *BB = I->getParent();
+        auto *C = CI->getCycle(BB);
+        if (!C)
+          return false;
+        return BB == C->getHeader();
+      };
+
+      // Check if the PHI operand is not dependent on the PHI itself. Every
+      // recurrence is a cyclic net of PHIs in the data flow, and has an
+      // equivalent Cycle in the control flow. One of those PHIs must be in the
+      // header of that control flow Cycle. This is independent of the choice of
+      // Cycles reported by CycleInfo. It is sufficient to check the PHIs in
+      // every Cycle header; if such a node is marked unknown, this will
+      // eventually propagate through the whole net of PHIs in the recurrence.
+      if (mayBeInCycleHeader(CI, cast<Instruction>(Usr))) {
          auto BaseOI = It->getSecond();
          BaseOI.addToAll(Offset.getZExtValue());
          if (IsFirstPHIUser || BaseOI == UsrOI) {
@@ -1532,16 +1561,16 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
                              << " in " << *Usr << "\n");
            return HandlePassthroughUser(Usr, PtrOI, Follow);
          }
+
          LLVM_DEBUG(
              dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch "
                     << *CurPtr << " in " << *Usr << "\n");
-      } else {
-        LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
-                          << *CurPtr << " in " << *Usr << "\n");
+        UsrOI.setUnknown();
+        Follow = true;
+        return true;
        }
  
-      // TODO: Approximate in case we know the direction of the recurrence.
-      UsrOI.setUnknown();
+      UsrOI.merge(PtrOI);
        Follow = true;
        return true;
      }
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll

index 0fba6ab..f4da140 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
@@ -63,6 +63,36 @@ entry:
    ret void
  }
  
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-NOT: hidden_multigrid_sync_arg
+; CHECK-LABEL: .name:           kernel_3
+
+define amdgpu_kernel void @kernel_3(i32 addrspace(1)* %a, i1 %cond)  {
+entry:
+  %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  br i1 %cond, label %old, label %new
+
+old:                                              ; preds = %entry
+  %tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 12
+  br label %join
+
+new:                                              ; preds = %entry
+  %tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 18
+  br label %join
+
+join:                                             ; preds = %new, %old
+  %.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ]
+  %.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)*
+
+  ;;; THIS USE of implicitarg_ptr should not produce hostcall metadata
+  %.in = load i16, i16 addrspace(4)* %.in.in, align 2
+
+  %idx.ext = sext i16 %.in to i64
+  %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
+  %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
+  ret void
+}
+
  declare i32 @llvm.amdgcn.workitem.id.x()
  
  declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

index bd2040a..6710668 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -51,6 +51,8 @@
  ; GCN-O0-NEXT:      Scalarize Masked Memory Intrinsics
  ; GCN-O0-NEXT:      Expand reduction intrinsics
  ; GCN-O0-NEXT:    AMDGPU Attributor
+; GCN-O0-NEXT:      FunctionPass Manager
+; GCN-O0-NEXT:        Cycle Info Analysis
  ; GCN-O0-NEXT:    CallGraph Construction
  ; GCN-O0-NEXT:    Call Graph SCC Pass Manager
  ; GCN-O0-NEXT:      AMDGPU Annotate Kernel Features
@@ -225,6 +227,8 @@
  ; GCN-O1-NEXT:      Natural Loop Information
  ; GCN-O1-NEXT:      TLS Variable Hoist
  ; GCN-O1-NEXT:    AMDGPU Attributor
+; GCN-O1-NEXT:      FunctionPass Manager
+; GCN-O1-NEXT:        Cycle Info Analysis
  ; GCN-O1-NEXT:    CallGraph Construction
  ; GCN-O1-NEXT:    Call Graph SCC Pass Manager
  ; GCN-O1-NEXT:      AMDGPU Annotate Kernel Features
@@ -509,6 +513,8 @@
  ; GCN-O1-OPTS-NEXT:      TLS Variable Hoist
  ; GCN-O1-OPTS-NEXT:      Early CSE
  ; GCN-O1-OPTS-NEXT:    AMDGPU Attributor
+; GCN-O1-OPTS-NEXT:      FunctionPass Manager
+; GCN-O1-OPTS-NEXT:        Cycle Info Analysis
  ; GCN-O1-OPTS-NEXT:    CallGraph Construction
  ; GCN-O1-OPTS-NEXT:    Call Graph SCC Pass Manager
  ; GCN-O1-OPTS-NEXT:      AMDGPU Annotate Kernel Features
@@ -807,6 +813,8 @@
  ; GCN-O2-NEXT:      TLS Variable Hoist
  ; GCN-O2-NEXT:      Early CSE
  ; GCN-O2-NEXT:    AMDGPU Attributor
+; GCN-O2-NEXT:      FunctionPass Manager
+; GCN-O2-NEXT:        Cycle Info Analysis
  ; GCN-O2-NEXT:    CallGraph Construction
  ; GCN-O2-NEXT:    Call Graph SCC Pass Manager
  ; GCN-O2-NEXT:      AMDGPU Annotate Kernel Features
@@ -1118,6 +1126,8 @@
  ; GCN-O3-NEXT:      Optimization Remark Emitter
  ; GCN-O3-NEXT:      Global Value Numbering
  ; GCN-O3-NEXT:    AMDGPU Attributor
+; GCN-O3-NEXT:      FunctionPass Manager
+; GCN-O3-NEXT:        Cycle Info Analysis
  ; GCN-O3-NEXT:    CallGraph Construction
  ; GCN-O3-NEXT:    Call Graph SCC Pass Manager
  ; GCN-O3-NEXT:      AMDGPU Annotate Kernel Features
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll

index 6669f34..a2915d0 100644 (file)
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -291,6 +291,172 @@ entry:
    ret i8 %i
  }
  
+; FIXME: The whole function is just "ret i8 21".
+
+define i8 @phi_gep_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
+; CHECK-LABEL: define {{[^@]+}}@phi_gep_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT:    store i8 21, i8* [[GEP23]], align 4
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT:    store i8 21, i8* [[GEP31]], align 4
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI_PTR:%.*]] = phi i8* [ [[GEP23]], [[THEN]] ], [ [[GEP31]], [[ELSE]] ]
+; CHECK-NEXT:    [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29
+; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[PHI_PTR]], align 4
+; CHECK-NEXT:    ret i8 [[I]]
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  br i1 %cnd1, label %then, label %else
+
+then:
+  %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+  store i8 21, i8* %gep23, align 4
+  br label %join
+
+else:
+  %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+  store i8 21, i8* %gep31, align 4
+  br label %join
+
+join:
+  %phi.ptr = phi i8* [%gep23, %then], [%gep31, %else]
+  ;; This store is eliminated
+  %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29
+  store i8 42, i8* %gep29, align 4
+  %i = load i8, i8* %phi.ptr, align 4
+  ret i8 %i
+}
+
+; FIXME: The whole function is just "ret i8 42".
+
+define i8 @phi_gep_simplifiable_2(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@phi_gep_simplifiable_2
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI_PTR:%.*]] = phi i8* [ [[GEP23]], [[THEN]] ], [ [[GEP31]], [[ELSE]] ]
+; CHECK-NEXT:    store i8 21, i8* [[PHI_PTR]], align 4
+; CHECK-NEXT:    ret i8 42
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29
+  ;; This store is propagated to the load.
+  store i8 42, i8* %gep29, align 4
+  br i1 %cnd1, label %then, label %else
+
+then:
+  %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+  br label %join
+
+else:
+  %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+  br label %join
+
+join:
+  %phi.ptr = phi i8* [%gep23, %then], [%gep31, %else]
+  store i8 21, i8* %phi.ptr, align 4
+  ;; Replaced with the constant, and both store/load are eliminated.
+  %i = load i8, i8* %gep29, align 4
+  ret i8 %i
+}
+
+define i8 @phi_gep_not_simplifiable_1(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
+; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_1
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI_PTR:%.*]] = phi i8* [ [[GEP23]], [[THEN]] ], [ [[GEP31]], [[ELSE]] ]
+; CHECK-NEXT:    store i8 42, i8* [[GEP23]], align 4
+; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[PHI_PTR]], align 4
+; CHECK-NEXT:    ret i8 [[I]]
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+  br i1 %cnd1, label %then, label %else
+
+then:
+  br label %join
+
+else:
+  %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+  br label %join
+
+join:
+  %phi.ptr = phi i8* [%gep23, %then], [%gep31, %else]
+  ;; This store cannot be eliminated
+  store i8 42, i8* %gep23, align 4
+  %i = load i8, i8* %phi.ptr, align 4
+  ret i8 %i
+}
+
+define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
+; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23
+; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI_PTR:%.*]] = phi i8* [ [[GEP23]], [[THEN]] ], [ [[GEP31]], [[ELSE]] ]
+; CHECK-NEXT:    store i8 21, i8* [[PHI_PTR]], align 4
+; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[GEP23]], align 4
+; CHECK-NEXT:    ret i8 [[I]]
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23
+  br i1 %cnd1, label %then, label %else
+
+then:
+  br label %join
+
+else:
+  %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31
+  br label %join
+
+join:
+  %phi.ptr = phi i8* [%gep23, %then], [%gep31, %else]
+  store i8 21, i8* %phi.ptr, align 4
+  %i = load i8, i8* %gep23, align 4
+  ret i8 %i
+}
+
  ; FIXME: This should be simplifiable. See comment inside.
  
  define i8 @phi_offsets_fixme(i1 %cnd1, i1 %cnd2) {
@@ -340,3 +506,6 @@ join:
  ; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) }
  ; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn }
  ;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CGSCC: {{.*}}
+; TUNIT: {{.*}}
author	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 15 Dec 2022 05:52:46 +0000 (11:22 +0530)
committer	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 15 Dec 2022 06:53:50 +0000 (12:23 +0530)
llvm/include/llvm/Analysis/CycleAnalysis.h		patch \| blob \| history
llvm/include/llvm/Transforms/IPO/Attributor.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/AttributorAttributes.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll		patch \| blob \| history