gallivm: fix saturated signed add / sub with llvm 9

author Roland Scheidegger <sroland@vmware.com>

Wed, 17 Apr 2019 00:34:01 +0000 (02:34 +0200)

committer Roland Scheidegger <sroland@vmware.com>

Wed, 17 Apr 2019 15:42:13 +0000 (17:42 +0200)
author Roland Scheidegger <sroland@vmware.com>
Wed, 17 Apr 2019 00:34:01 +0000 (02:34 +0200)
committer Roland Scheidegger <sroland@vmware.com>
Wed, 17 Apr 2019 15:42:13 +0000 (17:42 +0200)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 057c50e..02fb81a 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -555,6 +555,12 @@ lp_build_add(struct lp_build_context *bld,
          return bld->one;
  
        if (!type.floating && !type.fixed) {
+         if (HAVE_LLVM >= 0x0900) {
+            char intrin[32];
+            intrinsic = type.sign ? "llvm.sadd.sat" : "llvm.uadd.sat";
+            lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
+            return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
+         }
           if (type.width * type.length == 128) {
              if (util_cpu_caps.has_sse2) {
                 if (type.width == 8)
@@ -625,6 +631,7 @@ lp_build_add(struct lp_build_context *bld,
            * NOTE: cmp/select does sext/trunc of the mask. Does not seem to
            * interfere with llvm's ability to recognize the pattern but seems
            * a bit brittle.
+          * NOTE: llvm 9+ always uses (non arch specific) intrinsic.
            */
           LLVMValueRef overflowed = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, res);
           res = lp_build_select(bld, overflowed,
@@ -876,6 +883,12 @@ lp_build_sub(struct lp_build_context *bld,
          return bld->zero;
  
        if (!type.floating && !type.fixed) {
+         if (HAVE_LLVM >= 0x0900) {
+            char intrin[32];
+            intrinsic = type.sign ? "llvm.ssub.sat" : "llvm.usub.sat";
+            lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
+            return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
+         }
           if (type.width * type.length == 128) {
              if (util_cpu_caps.has_sse2) {
                 if (type.width == 8)
@@ -925,6 +938,7 @@ lp_build_sub(struct lp_build_context *bld,
            * NOTE: cmp/select does sext/trunc of the mask. Does not seem to
            * interfere with llvm's ability to recognize the pattern but seems
            * a bit brittle.
+          * NOTE: llvm 9+ always uses (non arch specific) intrinsic.
            */
           LLVMValueRef no_ov = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
           a = lp_build_select(bld, no_ov, a, b);
author	Roland Scheidegger <sroland@vmware.com>
	Wed, 17 Apr 2019 00:34:01 +0000 (02:34 +0200)
committer	Roland Scheidegger <sroland@vmware.com>
	Wed, 17 Apr 2019 15:42:13 +0000 (17:42 +0200)