From 90dc78bc62784faaa55afb0320cf3c2187d80ac6 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Mon, 17 Feb 2020 17:03:52 +0000
Subject: [PATCH] [ARM,MVE] Add intrinsics for abs, neg and not operations.

Summary:
This commit adds the unpredicated intrinsics for the unary operations
vabsq (absolute value), vnegq (arithmetic negation), vmvnq (bitwise
complement), vqabsq and vqnegq (saturating versions of abs and neg for
signed integers, in the sense that they give INT_MAX if an input lane
is INT_MIN).

This is done entirely in clang: all of these operations have existing
isel patterns and existing tests for them on the LLVM side, so I've
just made clang emit the same IR that those patterns already match.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74331
---
 clang/include/clang/Basic/arm_mve.td           |  27 ++
 clang/include/clang/Basic/arm_mve_defs.td      |   4 +
 clang/lib/CodeGen/CGBuiltin.cpp                |  13 +
 clang/test/CodeGen/arm-mve-intrinsics/absneg.c | 338 +++++++++++++++++++++++++
 4 files changed, 382 insertions(+)
 create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/absneg.c

diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 5cd88b0..dfc0ee8 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -234,6 +234,33 @@ let params = T.Unsigned in {
   defm vdwdup: vxdup_mc<(? u32:$limit, imm_1248:$step), (? $limit, $step)>;
 }
 
+let params = T.Int in {
+  def vmvnq: Intrinsic<Vector, (args Vector:$a),
+                       (xor $a, (uint_max Vector))>;
+}
+let params = T.Signed in {
+  def vnegq: Intrinsic<Vector, (args Vector:$a),
+                       (sub (zeroinit Vector), $a)>;
+  def vabsq: Intrinsic<Vector, (args Vector:$a),
+                       (select (icmp_slt $a, (zeroinit Vector)),
+                               (sub (zeroinit Vector), $a), $a)>;
+  def vqnegq: Intrinsic<Vector, (args Vector:$a),
+                        (select (icmp_eq $a, (int_min Vector)),
+                                (int_max Vector),
+                                (sub (zeroinit Vector), $a))>;
+  def vqabsq: Intrinsic<Vector, (args Vector:$a),
+                        (select (icmp_sgt $a, (zeroinit Vector)), $a,
+                                (select (icmp_eq $a, (int_min Vector)),
+                                        (int_max Vector),
+                                        (sub (zeroinit Vector), $a)))>;
+}
+let params = T.Float in {
+  def vnegq_f: Intrinsic<Vector, (args Vector:$a), (fneg $a)>,
+               NameOverride<"vnegq">;
+  def vabsq_f: Intrinsic<Vector, (args Vector:$a),
+               (IRIntBase<"fabs", [Vector]> $a)>, NameOverride<"vabsq">;
+}
+
 // The bitcasting below is not overcomplicating the IR because while
 // Vector and UVector may be different vector types at the C level i.e.
 // vectors of same size signed/unsigned ints. Once they're lowered
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index d4e8215..2d080f2 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -98,6 +98,9 @@ def extend: CGHelperFn<"SignOrZeroExtend"> {
   let special_params = [IRBuilderIntParam<2, "bool">];
 }
 def zeroinit: IRFunction<"llvm::Constant::getNullValue">;
+def int_min: CGHelperFn<"ARMMVEConstantSplat<1,0>">;
+def int_max: CGHelperFn<"ARMMVEConstantSplat<0,1>">;
+def uint_max: CGHelperFn<"ARMMVEConstantSplat<1,1>">;
 def undef: IRFunction<"UndefValue::get">;
 def icmp_eq: IRBuilder<"CreateICmpEQ">;
 def icmp_ne: IRBuilder<"CreateICmpNE">;
@@ -117,6 +120,7 @@ def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
 def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
+def fneg: IRBuilder<"CreateFNeg">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5e411bc..0081740 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7056,6 +7056,19 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
   }
 }
 
+template<unsigned HighBit, unsigned OtherBits>
+static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
+  // MVE-specific helper function to make a vector splat of a constant such as
+  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
+  llvm::Type *T = VT->getVectorElementType();
+  unsigned LaneBits = T->getPrimitiveSizeInBits();
+  uint32_t Value = HighBit << (LaneBits - 1);
+  if (OtherBits)
+    Value |= (1UL << (LaneBits - 1)) - 1;
+  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
+  return ARMMVEVectorSplat(Builder, Lane);
+}
+
 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E,
                                               ReturnValueSlot ReturnValue,
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
new file mode 100644
index 0000000..db4253f
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@@ -0,0 +1,338 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vabsq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vabsq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vabsq(a);
+#else /* POLYMORPHIC */
+    return vabsq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vabsq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vabsq(a);
+#else /* POLYMORPHIC */
+    return vabsq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[A]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vabsq(a);
+#else /* POLYMORPHIC */
+    return vabsq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <8 x i16> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vabsq(a);
+#else /* POLYMORPHIC */
+    return vabsq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vabsq(a);
+#else /* POLYMORPHIC */
+    return vabsq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vmvnq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vmvnq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vmvnq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vmvnq_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vmvnq_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vmvnq_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vmvnq(a);
+#else /* POLYMORPHIC */
+    return vmvnq_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]]
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vnegq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vnegq(a);
+#else /* POLYMORPHIC */
+    return vnegq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fneg <4 x float> [[A:%.*]]
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vnegq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vnegq(a);
+#else /* POLYMORPHIC */
+    return vnegq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vnegq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vnegq(a);
+#else /* POLYMORPHIC */
+    return vnegq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vnegq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vnegq(a);
+#else /* POLYMORPHIC */
+    return vnegq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vnegq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vnegq(a);
+#else /* POLYMORPHIC */
+    return vnegq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+// CHECK-NEXT:    [[TMP2:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[TMP3]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+int8x16_t test_vqabsq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vqabsq(a);
+#else /* POLYMORPHIC */
+    return vqabsq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+// CHECK-NEXT:    [[TMP2:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[TMP3]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP4]]
+//
+int16x8_t test_vqabsq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vqabsq(a);
+#else /* POLYMORPHIC */
+    return vqabsq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+// CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[TMP3]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+int32x4_t test_vqabsq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vqabsq(a);
+#else /* POLYMORPHIC */
+    return vqabsq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP1]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqnegq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vqnegq(a);
+#else /* POLYMORPHIC */
+    return vqnegq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP1]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqnegq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vqnegq(a);
+#else /* POLYMORPHIC */
+    return vqnegq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+// CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP1]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqnegq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vqnegq(a);
+#else /* POLYMORPHIC */
+    return vqnegq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
-- 
2.7.4