[X86] Allow Yz inline assembly constraint to choose ymm0 or zmm0 when avx/avx512...

author Craig Topper <craig.topper@gmail.com>

Wed, 6 May 2020 02:47:39 +0000 (19:47 -0700)

committer Craig Topper <craig.topper@gmail.com>

Wed, 6 May 2020 04:12:30 +0000 (21:12 -0700)
author Craig Topper <craig.topper@gmail.com>
Wed, 6 May 2020 02:47:39 +0000 (19:47 -0700)
committer Craig Topper <craig.topper@gmail.com>
Wed, 6 May 2020 04:12:30 +0000 (21:12 -0700)
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp

index 747d076..b0fecd6 100644 (file)
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1898,8 +1898,14 @@ bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
        return Size <= 64;
      case 'z':
      case '0':
-      // XMM0
-      if (FeatureMap.lookup("sse"))
+      // XMM0/YMM/ZMM0
+      if (FeatureMap.lookup("avx512f"))
+        // ZMM0 can be used if target supports AVX512F.
+        return Size <= 512U;
+      else if (FeatureMap.lookup("avx"))
+        // YMM0 can be used if target supports AVX.
+        return Size <= 256U;
+      else if (FeatureMap.lookup("sse"))
          return Size <= 128U;
        return false;
      case 'i':
diff --git a/clang/test/CodeGen/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/x86-inline-asm-v-constraint.c

index d335e4b..215cccf 100644 (file)
--- a/clang/test/CodeGen/x86-inline-asm-v-constraint.c
+++ b/clang/test/CodeGen/x86-inline-asm-v-constraint.c
@@ -28,3 +28,28 @@ __m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
  #endif
    return _zmm0;
  }
+
+// SSE: call <4 x float> asm "pcmpeqd $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
+__m128 testXMM0(void) {
+  __m128 xmm0;
+  __asm__("pcmpeqd %0, %0" :"=Yz"(xmm0));
+  return xmm0;
+}
+
+// AVX: call <8 x float> asm "vpcmpeqd $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
+__m256 testYMM0(void) {
+  __m256 ymm0;
+#ifdef AVX
+  __asm__("vpcmpeqd %0, %0, %0" :"=Yz"(ymm0));
+#endif
+  return ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$255, $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
+__m512 testZMM0(void) {
+  __m512 zmm0;
+#ifdef AVX512
+  __asm__("vpternlogd $255, %0, %0, %0" :"=Yz"(zmm0));
+#endif
+  return zmm0;
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 57db959..35c6d24 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48046,7 +48046,9 @@ TargetLowering::ConstraintWeight
        // XMM0
        case 'z':
        case '0':
-        if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
+        if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+            ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||
+            ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
            return CW_SpecificReg;
          return CW_Invalid;
        // Conditional OpMask regs (AVX512)
@@ -48496,6 +48498,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
          if (Subtarget.hasAVX())
            return std::make_pair(0U, &X86::VR256RegClass);
          break;
+      case MVT::v64i8:
+      case MVT::v32i16:
        case MVT::v8f64:
        case MVT::v16f32:
        case MVT::v16i32:
@@ -48521,7 +48525,42 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
      case 'z':
      case '0':
        if (!Subtarget.hasSSE1()) break;
-      return std::make_pair(X86::XMM0, &X86::VR128RegClass);
+      switch (VT.SimpleTy) {
+      default: break;
+      // Scalar SSE types.
+      case MVT::f32:
+      case MVT::i32:
+        return std::make_pair(X86::XMM0, &X86::FR32RegClass);
+      case MVT::f64:
+      case MVT::i64:
+        return std::make_pair(X86::XMM0, &X86::FR64RegClass);
+      case MVT::f128:
+      case MVT::v16i8:
+      case MVT::v8i16:
+      case MVT::v4i32:
+      case MVT::v2i64:
+      case MVT::v4f32:
+      case MVT::v2f64:
+        return std::make_pair(X86::XMM0, &X86::VR128RegClass);
+      // AVX types.
+      case MVT::v32i8:
+      case MVT::v16i16:
+      case MVT::v8i32:
+      case MVT::v4i64:
+      case MVT::v8f32:
+      case MVT::v4f64:
+        if (Subtarget.hasAVX())
+          return std::make_pair(X86::YMM0, &X86::VR256RegClass);
+        break;
+      case MVT::v8f64:
+      case MVT::v16f32:
+      case MVT::v16i32:
+      case MVT::v8i64:
+        if (Subtarget.hasAVX512())
+          return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
+        break;
+      }
+      break;
      case 'k':
        // This register class doesn't allocate k0 for masked vector operation.
        if (Subtarget.hasAVX512()) {
diff --git a/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll

index 2c8de16..e5b2b19 100644 (file)
--- a/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll
@@ -134,3 +134,13 @@ entry:
    ret <8 x float> %0
  }
  
+define <8 x float> @testYMM0() {
+; CHECK: vpcmpeqd %ymm0, %ymm0, %ymm0
+entry:
+  %ymm0 = alloca <8 x float>, align 32
+  %0 = call <8 x float> asm "vpcmpeqd $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
+  store <8 x float> %0, <8 x float>* %ymm0, align 32
+  %1 = load <8 x float>, <8 x float>* %ymm0, align 32
+  ret <8 x float> %1
+}
+
diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll

index 019973b..a712550 100644 (file)
--- a/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll
@@ -70,3 +70,12 @@ entry:
    ret <16 x float> %0
  }
  
+define <16 x float> @testZMM0() {
+entry:
+; CHECK: vpternlogd $255, %zmm0, %zmm0, %zmm0
+  %zmm0 = alloca <16 x float>, align 64
+  %0 = call <16 x float> asm "vpternlogd $$255, $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
+  store <16 x float> %0, <16 x float>* %zmm0, align 64
+  %1 = load <16 x float>, <16 x float>* %zmm0, align 64
+  ret <16 x float> %1
+}
author	Craig Topper <craig.topper@gmail.com>
	Wed, 6 May 2020 02:47:39 +0000 (19:47 -0700)
committer	Craig Topper <craig.topper@gmail.com>
	Wed, 6 May 2020 04:12:30 +0000 (21:12 -0700)
clang/lib/Basic/Targets/X86.cpp		patch \| blob \| history
clang/test/CodeGen/x86-inline-asm-v-constraint.c		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll		patch \| blob \| history
llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll		patch \| blob \| history