From 1301a8b473c614f99171728d928b424b56e7ed27 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 Sep 2021 22:21:37 +0200
Subject: [PATCH] [BasicAA] Don't unnecessarily extend pointer size

BasicAA GEP decomposition currently performs all calculation on the
maximum pointer size, but at least 64-bit, with an option to double
the size. The code comment claims that this improves analysis power
when working with uint64_t indices on 32-bit systems. However, I don't
see how this can be, at least while maintaining correctness:

When working on canonical code, the GEP indices will have GEP index
size. If the original code worked on uint64_t with a 32-bit size_t,
then there will be truncs inserted before use as a GEP index. Linear
expression decomposition does not look through truncs, so this will
be an opaque value as far as GEP decomposition is concerned. Working
on a wider pointer size does not help here (or have any effect at all).

When working on non-canonical code (before first InstCombine), the
GEP indices are implicitly truncated to GEP index size. The BasicAA
code currently just ignores this fact completely, and pretends that
this truncation doesn't happen. This is incorrect and will be
addressed by D110977.

I believe that for correctness reasons, it is important to work on
the actual GEP index size to properly model potential overflow.
BasicAA tries to patch over the fact that it uses the wrong size
(see adjustToPointerSize), but it only does that in limited cases
(only for constant values, and not all of them either). I'd like to
move this code towards always working on the correct size, and
dropping these artificial pointer size adjustments is the first step
towards that.

Differential Revision: https://reviews.llvm.org/D110657
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp              | 19 +------------------
 llvm/test/Analysis/BasicAA/gep-and-alias.ll           |  1 -
 .../BasicAA/gep-implicit-trunc-32-bit-pointers.ll     |  3 +--
 3 files changed, 2 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 68f052f..6064bd6 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -68,15 +68,6 @@ using namespace llvm;
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
                                           cl::init(true));
 
-/// By default, even on 32-bit architectures we use 64-bit integers for
-/// calculations. This will allow us to more-aggressively decompose indexing
-/// expressions calculated using i64 values (e.g., long long in C) which is
-/// common enough to worry about.
-static cl::opt<bool> ForceAtLeast64Bits("basic-aa-force-at-least-64b",
-                                        cl::Hidden, cl::init(true));
-static cl::opt<bool> DoubleCalcBits("basic-aa-double-calc-bits",
-                                    cl::Hidden, cl::init(false));
-
 /// SearchLimitReached / SearchTimes shows how often the limit of
 /// to decompose GEPs is reached. It will affect the precision
 /// of basic alias analysis.
@@ -459,14 +450,6 @@ static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) {
   return (Offset << ShiftBits).ashr(ShiftBits);
 }
 
-static unsigned getMaxPointerSize(const DataLayout &DL) {
-  unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
-  if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
-  if (DoubleCalcBits) MaxPointerSize *= 2;
-
-  return MaxPointerSize;
-}
-
 namespace {
 // A linear transformation of a Value; this class represents
 // ZExt(SExt(V, SExtBits), ZExtBits) * Scale.
@@ -547,7 +530,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
   SearchTimes++;
   const Instruction *CxtI = dyn_cast<Instruction>(V);
 
-  unsigned MaxPointerSize = getMaxPointerSize(DL);
+  unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
   DecomposedGEP Decomposed;
   Decomposed.Offset = APInt(MaxPointerSize, 0);
   Decomposed.HasCompileTimeConstantScale = true;
diff --git a/llvm/test/Analysis/BasicAA/gep-and-alias.ll b/llvm/test/Analysis/BasicAA/gep-and-alias.ll
index f99d6da..a70cde7f 100644
--- a/llvm/test/Analysis/BasicAA/gep-and-alias.ll
+++ b/llvm/test/Analysis/BasicAA/gep-and-alias.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -S -basic-aa -gvn < %s | FileCheck %s
-; RUN: opt -S -basic-aa -gvn -basic-aa-force-at-least-64b=0 < %s | FileCheck %s
 
 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.6.0"
diff --git a/llvm/test/Analysis/BasicAA/gep-implicit-trunc-32-bit-pointers.ll b/llvm/test/Analysis/BasicAA/gep-implicit-trunc-32-bit-pointers.ll
index 4b55986..1e9c729 100644
--- a/llvm/test/Analysis/BasicAA/gep-implicit-trunc-32-bit-pointers.ll
+++ b/llvm/test/Analysis/BasicAA/gep-implicit-trunc-32-bit-pointers.ll
@@ -20,10 +20,9 @@ define void @mustalias_overflow_in_32_bit_constants(i8* %ptr) {
   ret void
 }
 
-; FIXME: This should also be MustAlias as in the previous test.
 define void @mustalias_overflow_in_32_with_var_index([1 x i8]* %ptr, i64 %n) {
 ; CHECK-LABEL: Function: mustalias_overflow_in_32_with_var_index
-; CHECK:       NoAlias:  i8* %gep.1, i8* %gep.2
+; CHECK:       MustAlias: i8* %gep.1, i8* %gep.2
 ;
   %gep.1 = getelementptr [1 x i8], [1 x i8]* %ptr, i64 %n, i64 4294967296
   store i8 0, i8* %gep.1
-- 
2.7.4