From 597946a4dd2b3ba213f08a18285ab4362f873aa8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 29 Apr 2022 17:22:54 +0200
Subject: [PATCH] [ConstantFold] Don't convert getelementptr to
 ptrtoint+inttoptr

ConstantFolding currently converts "getelementptr i8, Ptr, (sub 0, V)"
to "inttoptr (sub (ptrtoint Ptr), V)". This transform is, taken by
itself, correct, but does came with two issues:

1. It unnecessarily broadens provenance by introducing an inttoptr.
   We generally prefer not to introduce inttoptr during optimization.
2. For the case where V == ptrtoint Ptr, this folds to inttoptr 0,
   which further folds to null. In that case provenance becomes
   incorrect. This has been observed as a real-world miscompile with
   rustc.

We should probably address that incorrect inttoptr 0 fold at some
point, but in either case we should also drop this inttoptr-introducing
fold. Instead, replace it with a fold rooted at
ptrtoint(getelementptr), which seems to cover the original
motivation for this fold (test2 in the changed file).

Differential Revision: https://reviews.llvm.org/D124677
---
 llvm/lib/Analysis/ConstantFolding.cpp              | 28 ++++++++++------------
 .../Transforms/InstCombine/constant-fold-gep.ll    |  8 +++----
 2 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 299ea33..3aa06a6 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -866,21 +866,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 
   Type *IntIdxTy = DL.getIndexType(Ptr->getType());
 
-  // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
-  // "inttoptr (sub (ptrtoint Ptr), V)"
-  if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
-    auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-    assert((!CE || CE->getType() == IntIdxTy) &&
-           "CastGEPIndices didn't canonicalize index types!");
-    if (CE && CE->getOpcode() == Instruction::Sub &&
-        CE->getOperand(0)->isNullValue()) {
-      Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
-      Res = ConstantExpr::getSub(Res, CE->getOperand(1));
-      Res = ConstantExpr::getIntToPtr(Res, ResTy);
-      return ConstantFoldConstant(Res, DL, TLI);
-    }
-  }
-
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     if (!isa<ConstantInt>(Ops[i]))
       return nullptr;
@@ -1336,6 +1321,19 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
             DL, BaseOffset, /*AllowNonInbounds=*/true));
         if (Base->isNullValue()) {
           FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
+        } else {
+          // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
+          if (GEP->getNumIndices() == 1 &&
+              GEP->getSourceElementType()->isIntegerTy(8)) {
+            auto *Ptr = cast<Constant>(GEP->getPointerOperand());
+            auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
+            Type *IntIdxTy = DL.getIndexType(Ptr->getType());
+            if (Sub && Sub->getType() == IntIdxTy &&
+                Sub->getOpcode() == Instruction::Sub &&
+                Sub->getOperand(0)->isNullValue())
+              FoldedValue = ConstantExpr::getSub(
+                  ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
+          }
         }
       }
       if (FoldedValue) {
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
index 928409a..ba38615 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -126,7 +126,7 @@ declare void @use.ptr(i8*)
 
 define i8* @gep_sub_self() {
 ; CHECK-LABEL: @gep_sub_self(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64)))
 ;
   %p.int = ptrtoint i8* @g to i64
   %p.int.neg = sub i64 0, %p.int
@@ -136,7 +136,7 @@ define i8* @gep_sub_self() {
 
 define i8* @gep_sub_self_plus_addr(i64 %addr) {
 ; CHECK-LABEL: @gep_sub_self_plus_addr(
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR:%.*]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR:%.*]]
 ; CHECK-NEXT:    ret i8* [[P2]]
 ;
   %p.int = ptrtoint i8* @g to i64
@@ -164,7 +164,7 @@ define i8* @gep_plus_addr_sub_self_in_loop() {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[ADDR:%.*]] = call i64 @get.i64()
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR]]
 ; CHECK-NEXT:    call void @use.ptr(i8* [[P2]])
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
@@ -182,7 +182,7 @@ loop:
 
 define i8* @gep_sub_other() {
 ; CHECK-LABEL: @gep_sub_other(
-; CHECK-NEXT:    ret i8* inttoptr (i64 sub (i64 ptrtoint (i8* @g to i64), i64 ptrtoint (i8* @g2 to i64)) to i8*)
+; CHECK-NEXT:    ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g2 to i64)))
 ;
   %p.int = ptrtoint i8* @g2 to i64
   %p.int.neg = sub i64 0, %p.int
-- 
2.7.4