From: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Date: Thu, 4 Aug 2022 10:16:44 +0000 (+0200)
Subject: [SystemZ] Improve handling of vector alignments.
X-Git-Tag: upstream/17.0.6~34063
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=de0e3117d40af95993b11d0622f9700415552d48;p=platform%2Fupstream%2Fllvm.git

[SystemZ] Improve handling of vector alignments.

Make the DataLayout string always hold a vector alignment of 8 bytes,
regardless of the vector ABI. This makes the datalayout depend only on the
target triple which is the general expectation (in assertions).

On older architectures where vectors use the natural alignment (16 bytes),
the front end will maintain the same behavior and produce an overalignment
compared to the datalayout.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D131158
---

diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 7def024..371eb25 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -51,13 +51,13 @@ public:
       // All vector types are default aligned on an 8-byte boundary, even if the
       // vector facility is not available. That is different from Linux.
       MaxVectorAlign = 64;
-      // Compared to Linux/ELF, the data layout differs only in some details:
-      // - name mangling is GOFF
-      // - 128 bit vector types are 64 bit aligned
+      // Compared to Linux/ELF, the data layout differs only in that name
+      // mangling is GOFF.
       resetDataLayout(
           "E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
     } else
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
+      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                      "-v128:64-a:8:16-n32:64");
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
     HasStrictFP = true;
   }
@@ -171,12 +171,14 @@ public:
     }
     HasVector &= !SoftFloat;
 
-    // If we use the vector ABI, vector types are 64-bit aligned.
-    if (HasVector && !getTriple().isOSzOS()) {
+    // If we use the vector ABI, vector types are 64-bit aligned. The
+    // DataLayout string is always set to this alignment as it is not a
+    // requirement that it follows the alignment emitted by the front end. It
+    // is assumed generally that the Datalayout should reflect only the
+    // target triple and not any specific feature.
+    if (HasVector && !getTriple().isOSzOS())
       MaxVectorAlign = 64;
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                      "-v128:64-a:8:16-n32:64");
-    }
+
     return true;
   }
 
diff --git a/clang/test/CodeGen/SystemZ/align-systemz-02.c b/clang/test/CodeGen/SystemZ/align-systemz-02.c
new file mode 100644
index 0000000..cc09a90
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/align-systemz-02.c
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=VECIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=VECASM
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=SCALIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=SCALASM
+
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+
+volatile vec_sint GlobVsi;
+
+struct S {
+  int A;
+  vec_sint Vsi;
+} GlobS;
+
+void fun() {
+  GlobS.Vsi = GlobVsi;
+}
+
+// VECIR: %struct.S = type { i32, <4 x i32> }
+// VECIR: @GlobVsi = global <4 x i32> zeroinitializer, align 8
+// VECIR: @GlobS = global %struct.S zeroinitializer, align 8
+// VECIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 8
+// VECIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 1), align 8
+
+// VECASM:      lgrl %r1, GlobVsi@GOT
+// VECASM-NEXT: vl   %v0, 0(%r1), 3
+// VECASM-NEXT: lgrl %r1, GlobS@GOT
+// VECASM-NEXT: vst  %v0, 8(%r1), 3
+//
+// VECASM:   .globl  GlobVsi
+// VECASM:   .p2align        3
+// VECASM: GlobVsi:
+// VECASM:   .space  16
+// VECASM:   .globl  GlobS
+// VECASM:   .p2align        3
+// VECASM: GlobS:
+// VECASM:   .space  24
+
+// SCALIR: %struct.S = type { i32, [12 x i8], <4 x i32> }
+// SCALIR: @GlobVsi = global <4 x i32> zeroinitializer, align 16
+// SCALIR: @GlobS = global %struct.S zeroinitializer, align 16
+// SCALIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 16
+// SCALIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 2), align 16
+
+// SCALASM:      lgrl    %r1, GlobVsi@GOT
+// SCALASM-NEXT: l       %r0, 0(%r1)
+// SCALASM-NEXT: l       %r2, 4(%r1)
+// SCALASM-NEXT: l       %r3, 8(%r1)
+// SCALASM-NEXT: l       %r4, 12(%r1)
+// SCALASM-NEXT: lgrl    %r1, GlobS@GOT
+// SCALASM-NEXT: st      %r4, 28(%r1)
+// SCALASM-NEXT: st      %r3, 24(%r1)
+// SCALASM-NEXT: st      %r2, 20(%r1)
+// SCALASM-NEXT: st      %r0, 16(%r1)
+//
+// SCALASM:   .globl  GlobVsi
+// SCALASM:   .p2align        4
+// SCALASM: GlobVsi:
+// SCALASM:   .space  16
+// SCALASM:   .globl  GlobS
+// SCALASM:   .p2align        4
+// SCALASM: GlobS:
+// SCALASM:   .space  32
+
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index e415083..e5ee17b 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -223,7 +223,7 @@
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -target-feature +soft-float -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
-// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 
 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ-VECTOR
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 31f8ee2..8c1be7d 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -42,37 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
   initializeSystemZTDCPassPass(PR);
 }
 
-// Determine whether we use the vector ABI.
-static bool UsesVectorABI(StringRef CPU, StringRef FS) {
-  // We use the vector ABI whenever the vector facility is avaiable.
-  // This is the case by default if CPU is z13 or later, and can be
-  // overridden via "[+-]vector" feature string elements.
-  bool VectorABI = true;
-  bool SoftFloat = false;
-  if (CPU.empty() || CPU == "generic" ||
-      CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
-      CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
-    VectorABI = false;
-
-  SmallVector<StringRef, 3> Features;
-  FS.split(Features, ',', -1, false /* KeepEmpty */);
-  for (auto &Feature : Features) {
-    if (Feature == "vector" || Feature == "+vector")
-      VectorABI = true;
-    if (Feature == "-vector")
-      VectorABI = false;
-    if (Feature == "soft-float" || Feature == "+soft-float")
-      SoftFloat = true;
-    if (Feature == "-soft-float")
-      SoftFloat = false;
-  }
-
-  return VectorABI && !SoftFloat;
-}
-
-static std::string computeDataLayout(const Triple &TT, StringRef CPU,
-                                     StringRef FS) {
-  bool VectorABI = UsesVectorABI(CPU, FS);
+static std::string computeDataLayout(const Triple &TT) {
   std::string Ret;
 
   // Big endian.
@@ -92,10 +62,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   // 128-bit floats are aligned only to 64 bits.
   Ret += "-f128:64";
 
-  // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
-  // bits. On z/OS, vector types are always aligned to 64 bits.
-  if (VectorABI || TT.isOSzOS())
-    Ret += "-v128:64";
+  // The DataLayout string always holds a vector alignment of 64 bits, see
+  // comment in clang/lib/Basic/Targets/SystemZ.h.
+  Ret += "-v128:64";
 
   // We prefer 16 bits of aligned for all globals; see above.
   Ret += "-a:8:16";
@@ -174,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                            Optional<CodeModel::Model> CM,
                                            CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(
-          T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+          T, computeDataLayout(TT), TT, CPU, FS, Options,
           getEffectiveRelocModel(RM),
           getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
           OL),
diff --git a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
index e8f9d35..99d96a6 100644
--- a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
+++ b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
@@ -38,11 +38,11 @@ entry:
 attributes #3 = { "target-cpu"="z14" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun3(<2 x double>* %A) #3 {
 ; CHECK-LABEL:     fun3:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14
@@ -111,11 +111,11 @@ entry:
 attributes #7 = { "target-cpu"="zEC12" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun7(<2 x double>* %A) #7 {
 ; CHECK-LABEL:     fun7:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14
diff --git a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
index c8ccae2..5a33197 100644
--- a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
@@ -1,55 +1,73 @@
-; Verify that we use the vector ABI datalayout if and only if
-; the vector facility is present.
+; Verify that a struct as generated by the frontend is correctly accessed in
+; both cases of enabling/disabling the vector facility.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=+soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=soft-float,-soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=-soft-float,soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 
-%struct.S = type { i8, <2 x i64> }
+%struct.S_vx = type { i8, <2 x i64> }
+%struct.S_novx = type { i8, [15 x i8], <2 x i64> }
 
-define void @test(%struct.S* %s) nounwind {
-; CHECK-VECTOR-LABEL: @test
+define void @fun_vx(%struct.S_vx* %s) nounwind {
+; CHECK-LABEL: @fun_vx
+;
 ; CHECK-VECTOR: vl %v0, 8(%r2)
-; CHECK-NOVECTOR-LABEL: @test
+; CHECK-VECTOR: vst %v0, 8(%r2), 3
+;
+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
+; CHECK-NOVECTOR-DAG: agsi 8(%r2), 1
+  %ptr = getelementptr %struct.S_vx, %struct.S_vx* %s, i64 0, i32 1
+  %vec = load <2 x i64>, <2 x i64>* %ptr
+  %add = add <2 x i64> %vec, <i64 1, i64 1>
+  store <2 x i64> %add, <2 x i64>* %ptr
+  ret void
+}
+
+define void @fun_novx(%struct.S_novx* %s) nounwind {
+; CHECK-LABEL: @fun_novx
+;
+; CHECK-VECTOR: vl  %v0, 16(%r2), 3
+; CHECK-VECTOR: vst %v0, 16(%r2), 3
+;
 ; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
 ; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
-  %ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
+  %ptr = getelementptr %struct.S_novx, %struct.S_novx* %s, i64 0, i32 2
   %vec = load <2 x i64>, <2 x i64>* %ptr
   %add = add <2 x i64> %vec, <i64 1, i64 1>
   store <2 x i64> %add, <2 x i64>* %ptr