From 6e6966460a387724cd87ec92d3a91ecfbee84a24 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 7 Apr 2016 15:45:02 +0000
Subject: [PATCH] [GVN] Fix handling of sub-byte types in big-endian mode

When GVN wants to re-interpret an already available value in a smaller
type, it needs to right-shift the value on big-endian systems to ensure
the correct bytes are accessed.  The shift value is the difference of
the sizes of the two types.

This is correct as long as both types occupy multiples of full bytes.
However, when one of them is a sub-byte type like i1, this no longer
holds true: we still need to shift, but only to access the correct
*byte*.  Accessing bits within the byte requires no shift in either
endianness; e.g. an i1 resides in the least-significant bit of its
containing byte on both big- and little-endian systems.

Therefore, the appropriate shift value to be used is the difference of
the *storage* sizes of the two types.  This is already handled correctly
in one place where such a shift takes place (GetStoreValueForLoad), but
is incorrect in two other places: GetLoadValueForLoad and
CoerceAvailableValueToLoadType.

This patch changes both places to use the storage size as well.

Differential Revision: http://reviews.llvm.org/D18662

llvm-svn: 265684
---
 llvm/lib/Transforms/Scalar/GVN.cpp     |  7 +++---
 llvm/test/Transforms/GVN/big-endian.ll | 40 ++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/GVN/big-endian.ll

diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index f286eb2..121098f 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -775,7 +775,9 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
   // If this is a big-endian system, we need to shift the value down to the low
   // bits so that a truncate will work.
   if (DL.isBigEndian()) {
-    StoredVal = IRB.CreateLShr(StoredVal, StoreSize - LoadSize, "tmp");
+    uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
+                        DL.getTypeStoreSizeInBits(LoadedTy);
+    StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
   }
 
   // Truncate the integer to the right size now.
@@ -1056,8 +1058,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
     // system, we need to shift down to get the relevant bits.
     Value *RV = NewLoad;
     if (DL.isBigEndian())
-      RV = Builder.CreateLShr(RV,
-                    NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+      RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValSize) * 8);
     RV = Builder.CreateTrunc(RV, SrcVal->getType());
     SrcVal->replaceAllUsesWith(RV);
 
diff --git a/llvm/test/Transforms/GVN/big-endian.ll b/llvm/test/Transforms/GVN/big-endian.ll
new file mode 100644
index 0000000..faaf3ef
--- /dev/null
+++ b/llvm/test/Transforms/GVN/big-endian.ll
@@ -0,0 +1,40 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"                                                                                         
+target triple = "powerpc64-unknown-linux-gnu"                                                                                     
+
+;; Make sure we use correct bit shift based on storage size for
+;; loads reusing a load value.
+define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) {
+; CHECK-LABEL: @test1
+; CHECK: [[V1:%.*]] = load i16, i16* %{{.*}}
+; CHECK: [[V2:%.*]] = lshr i16 [[V1]], 8
+; CHECK: trunc i16 [[V2]] to i1
+
+  %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0
+  %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8
+  %valueLoadB.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 0
+  %valueLoadB.fca.0.load = load i1, i1* %valueLoadB.fca.0.gep, align 8
+  %isTrue = and i1 %valueLoadA.fca.0.load, %valueLoadB.fca.0.load
+  %valueLoadA.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 1
+  %valueLoadA.fca.1.load = load i8, i8* %valueLoadA.fca.1.gep, align 1
+  %isNotNullA = icmp ne i8 %valueLoadA.fca.1.load, 0
+  %valueLoadB.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 1
+  %valueLoadB.fca.1.load = load i8, i8* %valueLoadB.fca.1.gep, align 1
+  %isNotNullB = icmp ne i8 %valueLoadB.fca.1.load, 0
+  %isNotNull = and i1 %isNotNullA, %isNotNullB
+  %isTrueAndNotNull = and i1 %isTrue, %isNotNull
+  %ret = zext i1 %isTrueAndNotNull to i64
+  ret i64 %ret
+}
+
+;; And likewise for loads reusing a store value.
+define i1 @test2(i8 %V, i8* %P) {
+; CHECK-LABEL: @test2
+; CHECK-NOT: lshr
+  store i8 %V, i8* %P
+  %P2 = bitcast i8* %P to i1*
+  %A = load i1, i1* %P2
+  ret i1 %A
+}
+
-- 
2.7.4