From 4f2fd3818b0eb26806f366bc37369349aeedcaf9 Mon Sep 17 00:00:00 2001
From: Hyeongyu Kim <gusrb406@snu.ac.kr>
Date: Mon, 31 May 2021 14:05:29 +0900
Subject: [PATCH] [InstCombine] Fix miscompile on GEP+load to icmp fold
 (PR45210)
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

As noted in PR45210: https://bugs.llvm.org/show_bug.cgi?id=45210
...the bug is triggered as Eli say when sext(idx) * ElementSize overflows.

```
   // assume that GV is an array of 4-byte elements
   GEP = gep GV, 0, Idx // this is accessing Idx * 4
   L = load GEP
Â  Â ICI = icmp eq L, value
Â =>
Â  Â ICI = icmp eq Idx, NewIdx
```

The foldCmpLoadFromIndexedGlobal function simplifies GEP+load operation to icmp.
And there is a problem because Idx * ElementSize can overflow.

Let's assume that the wanted value is at offset 0.
Then, there are actually four possible values for Idx to match offset 0: 0x00..00, 0x40..00, 0x80..00, 0xC0..00.
We should return true for all these values, but currently, the new icmp only returns true for 0x00..00.

This problem can be solved by masking off (trailing zeros of ElementSize) bits from Idx.

```
   ...
Â =>
   Idx' = and Idx, 0x3F..FF
Â  Â ICI = icmp eq Idx', NewIdx
```

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D99481
---
 .../Transforms/InstCombine/InstCombineCompares.cpp | 22 +++++++++++++++---
 llvm/test/Transforms/InstCombine/load-cmp.ll       | 26 ++++++++++++++--------
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9e4d888..5e60edd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -269,14 +269,30 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
-  // If the index is larger than the pointer size of the target, truncate the
-  // index down like the GEP would do implicitly.  We don't have to do this for
-  // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds()) {
+    // If the index is larger than the pointer size of the target, truncate the
+    // index down like the GEP would do implicitly.  We don't have to do this
+    // for an inbounds GEP because the index can't be out of range.
     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
     if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize)
       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
+
+    unsigned ElementSize =
+        DL.getTypeAllocSize(Init->getType()->getArrayElementType());
+
+    // If inbounds keyword is not present, Idx * ElementSize can overflow.
+    // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+    // Then, there are two possible values for Idx to match offset 0:
+    // 0x00..00, 0x80..00.
+    // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
+    // comparison is false if Idx was 0x80..00.
+    // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
+    if (countTrailingZeros(ElementSize) != 0) {
+      Value *Mask = ConstantInt::getSigned(Idx->getType(), -1);
+      Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
+      Idx = Builder.CreateAnd(Idx, Mask);
+    }
   }
 
   // If the comparison is only true for one or two elements, emit direct
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 84f692d..b56cfdd 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -33,7 +33,8 @@ define i1 @test1(i32 %X) {
 
 define i1 @test1_noinbounds(i32 %X) {
 ; CHECK-LABEL: @test1_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
@@ -44,8 +45,8 @@ define i1 @test1_noinbounds(i32 %X) {
 
 define i1 @test1_noinbounds_i64(i64 %X) {
 ; CHECK-LABEL: @test1_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
@@ -54,10 +55,14 @@ define i1 @test1_noinbounds_i64(i64 %X) {
   ret i1 %R
 }
 
+; When x is 0x8009, x * 2(ElementSize) overflows and it becomes 0x0012.
+; It is the same location as when x is 0x0009, So we need to return
+; true if x AND 0x7FFF is 0x0009.
+
 define i1 @test1_noinbounds_as1(i32 %x) {
 ; CHECK-LABEL: @test1_noinbounds_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 32767
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
@@ -260,7 +265,8 @@ define i1 @test10_struct_arr(i32 %x) {
 
 define i1 @test10_struct_arr_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
@@ -294,7 +300,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
@@ -305,8 +313,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
 
 define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
-- 
2.7.4