[SystemZ] Improve handling of vector alignments.

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Thu, 4 Aug 2022 10:16:44 +0000 (12:16 +0200)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Thu, 8 Sep 2022 15:33:05 +0000 (17:33 +0200)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Thu, 4 Aug 2022 10:16:44 +0000 (12:16 +0200)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Thu, 8 Sep 2022 15:33:05 +0000 (17:33 +0200)
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h

index 7def024d07a77878f6fca24a03ad9b7bdc430a7a..371eb25163786d3bf63547d99d48d8599df67330 100644 (file)
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -51,13 +51,13 @@ public:
        // All vector types are default aligned on an 8-byte boundary, even if the
        // vector facility is not available. That is different from Linux.
        MaxVectorAlign = 64;
-      // Compared to Linux/ELF, the data layout differs only in some details:
-      // - name mangling is GOFF
-      // - 128 bit vector types are 64 bit aligned
+      // Compared to Linux/ELF, the data layout differs only in that name
+      // mangling is GOFF.
        resetDataLayout(
            "E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
      } else
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
+      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                      "-v128:64-a:8:16-n32:64");
      MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
      HasStrictFP = true;
    }
@@ -171,12 +171,14 @@ public:
      }
      HasVector &= !SoftFloat;
  
-    // If we use the vector ABI, vector types are 64-bit aligned.
-    if (HasVector && !getTriple().isOSzOS()) {
+    // If we use the vector ABI, vector types are 64-bit aligned. The
+    // DataLayout string is always set to this alignment as it is not a
+    // requirement that it follows the alignment emitted by the front end. It
+    // is assumed generally that the Datalayout should reflect only the
+    // target triple and not any specific feature.
+    if (HasVector && !getTriple().isOSzOS())
        MaxVectorAlign = 64;
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                      "-v128:64-a:8:16-n32:64");
-    }
+
      return true;
    }
  
diff --git a/clang/test/CodeGen/SystemZ/align-systemz-02.c b/clang/test/CodeGen/SystemZ/align-systemz-02.c

new file mode 100644 (file)

index 0000000..cc09a90
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/align-systemz-02.c
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=VECIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=VECASM
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=SCALIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=SCALASM
+
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+
+volatile vec_sint GlobVsi;
+
+struct S {
+  int A;
+  vec_sint Vsi;
+} GlobS;
+
+void fun() {
+  GlobS.Vsi = GlobVsi;
+}
+
+// VECIR: %struct.S = type { i32, <4 x i32> }
+// VECIR: @GlobVsi = global <4 x i32> zeroinitializer, align 8
+// VECIR: @GlobS = global %struct.S zeroinitializer, align 8
+// VECIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 8
+// VECIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 1), align 8
+
+// VECASM:      lgrl %r1, GlobVsi@GOT
+// VECASM-NEXT: vl   %v0, 0(%r1), 3
+// VECASM-NEXT: lgrl %r1, GlobS@GOT
+// VECASM-NEXT: vst  %v0, 8(%r1), 3
+//
+// VECASM:   .globl  GlobVsi
+// VECASM:   .p2align        3
+// VECASM: GlobVsi:
+// VECASM:   .space  16
+// VECASM:   .globl  GlobS
+// VECASM:   .p2align        3
+// VECASM: GlobS:
+// VECASM:   .space  24
+
+// SCALIR: %struct.S = type { i32, [12 x i8], <4 x i32> }
+// SCALIR: @GlobVsi = global <4 x i32> zeroinitializer, align 16
+// SCALIR: @GlobS = global %struct.S zeroinitializer, align 16
+// SCALIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 16
+// SCALIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 2), align 16
+
+// SCALASM:      lgrl    %r1, GlobVsi@GOT
+// SCALASM-NEXT: l       %r0, 0(%r1)
+// SCALASM-NEXT: l       %r2, 4(%r1)
+// SCALASM-NEXT: l       %r3, 8(%r1)
+// SCALASM-NEXT: l       %r4, 12(%r1)
+// SCALASM-NEXT: lgrl    %r1, GlobS@GOT
+// SCALASM-NEXT: st      %r4, 28(%r1)
+// SCALASM-NEXT: st      %r3, 24(%r1)
+// SCALASM-NEXT: st      %r2, 20(%r1)
+// SCALASM-NEXT: st      %r0, 16(%r1)
+//
+// SCALASM:   .globl  GlobVsi
+// SCALASM:   .p2align        4
+// SCALASM: GlobVsi:
+// SCALASM:   .space  16
+// SCALASM:   .globl  GlobS
+// SCALASM:   .p2align        4
+// SCALASM: GlobS:
+// SCALASM:   .space  32
+
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c

index e4150837279cea5c142479f776b87062fe457cd9..e5ee17bc38a7f0fa48e9dd6e96eed00ae5abe896 100644 (file)
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -223,7 +223,7 @@
  // RUN: FileCheck %s -check-prefix=SYSTEMZ
  // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -target-feature +soft-float -o - -emit-llvm %s | \
  // RUN: FileCheck %s -check-prefix=SYSTEMZ
-// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
  
  // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -o - -emit-llvm %s | \
  // RUN: FileCheck %s -check-prefix=SYSTEMZ-VECTOR
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp

index 31f8ee2f894d0ece5dcf8b9a487b45a6a4abef67..8c1be7d4949da2af5300a6be4810afe250aac4f7 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -42,37 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
    initializeSystemZTDCPassPass(PR);
  }
  
-// Determine whether we use the vector ABI.
-static bool UsesVectorABI(StringRef CPU, StringRef FS) {
-  // We use the vector ABI whenever the vector facility is avaiable.
-  // This is the case by default if CPU is z13 or later, and can be
-  // overridden via "[+-]vector" feature string elements.
-  bool VectorABI = true;
-  bool SoftFloat = false;
-  if (CPU.empty() || CPU == "generic" ||
-      CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
-      CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
-    VectorABI = false;
-
-  SmallVector<StringRef, 3> Features;
-  FS.split(Features, ',', -1, false /* KeepEmpty */);
-  for (auto &Feature : Features) {
-    if (Feature == "vector" || Feature == "+vector")
-      VectorABI = true;
-    if (Feature == "-vector")
-      VectorABI = false;
-    if (Feature == "soft-float" || Feature == "+soft-float")
-      SoftFloat = true;
-    if (Feature == "-soft-float")
-      SoftFloat = false;
-  }
-
-  return VectorABI && !SoftFloat;
-}
-
-static std::string computeDataLayout(const Triple &TT, StringRef CPU,
-                                     StringRef FS) {
-  bool VectorABI = UsesVectorABI(CPU, FS);
+static std::string computeDataLayout(const Triple &TT) {
    std::string Ret;
  
    // Big endian.
@@ -92,10 +62,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
    // 128-bit floats are aligned only to 64 bits.
    Ret += "-f128:64";
  
-  // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
-  // bits. On z/OS, vector types are always aligned to 64 bits.
-  if (VectorABI || TT.isOSzOS())
-    Ret += "-v128:64";
+  // The DataLayout string always holds a vector alignment of 64 bits, see
+  // comment in clang/lib/Basic/Targets/SystemZ.h.
+  Ret += "-v128:64";
  
    // We prefer 16 bits of aligned for all globals; see above.
    Ret += "-a:8:16";
@@ -174,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                             Optional<CodeModel::Model> CM,
                                             CodeGenOpt::Level OL, bool JIT)
      : LLVMTargetMachine(
-          T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+          T, computeDataLayout(TT), TT, CPU, FS, Options,
            getEffectiveRelocModel(RM),
            getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
            OL),
diff --git a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll

index e8f9d359722fef3d99d24a811cf0da189d561c38..99d96a6cd05ee2353402b54efba8d4b1b55f35e3 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
+++ b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
@@ -38,11 +38,11 @@ entry:
  attributes #3 = { "target-cpu"="z14" "target-features"="+vector" "use-soft-float"="false" }
  define <2 x double> @fun3(<2 x double>* %A) #3 {
  ; CHECK-LABEL:     fun3:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
  ; SOFT-FLOAT:      lg %r0, 0(%r2)
  ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
  ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
  ; NO-VECTOR:       ld %f0, 0(%r2)
  ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
  ; CHECK-NEXT:      br %r14
@@ -111,11 +111,11 @@ entry:
  attributes #7 = { "target-cpu"="zEC12" "target-features"="+vector" "use-soft-float"="false" }
  define <2 x double> @fun7(<2 x double>* %A) #7 {
  ; CHECK-LABEL:     fun7:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
  ; SOFT-FLOAT:      lg %r0, 0(%r2)
  ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
  ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
  ; NO-VECTOR:       ld %f0, 0(%r2)
  ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
  ; CHECK-NEXT:      br %r14
diff --git a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll

index c8ccae2561abffb8d4704398fb97b25daeda9cde..5a33197b62a552016749d6b155770f6627e52cc2 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
@@ -1,55 +1,73 @@
-; Verify that we use the vector ABI datalayout if and only if
-; the vector facility is present.
+; Verify that a struct as generated by the frontend is correctly accessed in
+; both cases of enabling/disabling the vector facility.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=+soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
  ; RUN:   -mattr=soft-float,-soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
  ; RUN:   -mattr=-soft-float,soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
  
-%struct.S = type { i8, <2 x i64> }
+%struct.S_vx = type { i8, <2 x i64> }
+%struct.S_novx = type { i8, [15 x i8], <2 x i64> }
  
-define void @test(%struct.S* %s) nounwind {
-; CHECK-VECTOR-LABEL: @test
+define void @fun_vx(%struct.S_vx* %s) nounwind {
+; CHECK-LABEL: @fun_vx
+;
  ; CHECK-VECTOR: vl %v0, 8(%r2)
-; CHECK-NOVECTOR-LABEL: @test
+; CHECK-VECTOR: vst %v0, 8(%r2), 3
+;
+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
+; CHECK-NOVECTOR-DAG: agsi 8(%r2), 1
+  %ptr = getelementptr %struct.S_vx, %struct.S_vx* %s, i64 0, i32 1
+  %vec = load <2 x i64>, <2 x i64>* %ptr
+  %add = add <2 x i64> %vec, <i64 1, i64 1>
+  store <2 x i64> %add, <2 x i64>* %ptr
+  ret void
+}
+
+define void @fun_novx(%struct.S_novx* %s) nounwind {
+; CHECK-LABEL: @fun_novx
+;
+; CHECK-VECTOR: vl  %v0, 16(%r2), 3
+; CHECK-VECTOR: vst %v0, 16(%r2), 3
+;
  ; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
  ; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
-  %ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
+  %ptr = getelementptr %struct.S_novx, %struct.S_novx* %s, i64 0, i32 2
    %vec = load <2 x i64>, <2 x i64>* %ptr
    %add = add <2 x i64> %vec, <i64 1, i64 1>
    store <2 x i64> %add, <2 x i64>* %ptr
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Thu, 4 Aug 2022 10:16:44 +0000 (12:16 +0200)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Thu, 8 Sep 2022 15:33:05 +0000 (17:33 +0200)
clang/lib/Basic/Targets/SystemZ.h		patch \| blob \| history
clang/test/CodeGen/SystemZ/align-systemz-02.c	[new file with mode: 0644]	patch \| blob
clang/test/CodeGen/target-data.c		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp		patch \| blob \| history
llvm/test/CodeGen/SystemZ/function-attributes-01.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-abi-align.ll		patch \| blob \| history