gallivm: add support for 8/16-bit mul_hi
authorDave Airlie <airlied@redhat.com>
Thu, 8 Oct 2020 21:13:45 +0000 (07:13 +1000)
committerDave Airlie <airlied@redhat.com>
Mon, 26 Oct 2020 01:03:51 +0000 (11:03 +1000)
This 32x32 code only needs small tweaks for this case.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7309>

.gitlab-ci/piglit/cl.txt
src/gallium/auxiliary/gallivm/lp_bld_arit.c

index d0073d8..a71928b 100644 (file)
@@ -51,16 +51,6 @@ program/execute/atomic_int64_xchg-local: skip
 program/execute/atomic_int64_xor-global: skip
 program/execute/atomic_int64_xor-global-return: skip
 program/execute/atomic_int64_xor-local: skip
-program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char1: fail
-program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char16: fail
-program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char2: fail
-program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char4: fail
-program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char8: fail
-program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char1: fail
-program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char16: fail
-program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char2: fail
-program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char4: fail
-program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char8: fail
 program/execute/builtin/builtin-char-popcount-1.2.generated: skip
 program/execute/builtin/builtin-float-cos-1.0.generated: timeout
 program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float1: fail
@@ -99,68 +89,13 @@ program/execute/builtin/builtin-float-sin-1.0.generated: timeout
 program/execute/builtin/builtin-float-sincos-1.0.generated: timeout
 program/execute/builtin/builtin-float-tan-1.0.generated: timeout
 program/execute/builtin/builtin-int-popcount-1.2.generated: skip
-program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long1: fail
-program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long16: fail
-program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long2: fail
-program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long4: fail
-program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long8: fail
-program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long1: fail
-program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long16: fail
-program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long2: fail
-program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long4: fail
-program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long8: fail
-program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long1: fail
-program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long16: fail
-program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long2: fail
-program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long4: fail
-program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long8: fail
 program/execute/builtin/builtin-long-popcount-1.2.generated: skip
-program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short1: fail
-program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short16: fail
-program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short2: fail
-program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short4: fail
-program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short8: fail
-program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short1: fail
-program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short16: fail
-program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short2: fail
-program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short4: fail
-program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short8: fail
 program/execute/builtin/builtin-short-popcount-1.2.generated: skip
 program/execute/builtin/builtin-shuffle-half-ushort: skip
 program/execute/builtin/builtin-shuffle2-half-ushort: skip
-program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar1: fail
-program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar16: fail
-program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar2: fail
-program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar4: fail
-program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar8: fail
-program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar1: fail
-program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar16: fail
-program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar2: fail
-program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar4: fail
-program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar8: fail
 program/execute/builtin/builtin-uchar-popcount-1.2.generated: skip
 program/execute/builtin/builtin-uint-popcount-1.2.generated: skip
-program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong1: fail
-program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong16: fail
-program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong2: fail
-program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong4: fail
-program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong8: fail
-program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong1: fail
-program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong16: fail
-program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong2: fail
-program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong4: fail
-program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong8: fail
 program/execute/builtin/builtin-ulong-popcount-1.2.generated: skip
-program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort1: fail
-program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort16: fail
-program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort2: fail
-program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort4: fail
-program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort8: fail
-program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort1: fail
-program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort16: fail
-program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort2: fail
-program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort4: fail
-program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort8: fail
 program/execute/builtin/builtin-ushort-popcount-1.2.generated: skip
 program/execute/call-clobbers-amdgcn: skip
 program/execute/calls-large-struct: crash
@@ -191,8 +126,8 @@ program/execute/vstore/vstore-half-private: skip
 summary:
        name:  results
        ----  --------
-       pass:     3672
-       fail:      107
+       pass:     3737
+       fail:       42
       crash:        6
        skip:       73
     timeout:        4
index 717ab14..165d73d 100644 (file)
@@ -1178,8 +1178,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
 
 
 /*
- * Widening mul, valid for 32x32 bit -> 64bit only.
- * Result is low 32bits, high bits returned in res_hi.
+ * Widening mul, valid for <= 32 (8, 16, 32) -> 64
+ * Result is low bits, high bits returned in res_hi.
  *
  * Emits generic code.
  */
@@ -1197,9 +1197,12 @@ lp_build_mul_32_lohi(struct lp_build_context *bld,
 
    type_tmp = bld->type;
    narrow_type = lp_build_vec_type(gallivm, type_tmp);
-   type_tmp.width *= 2;
+   if (bld->type.width < 32)
+      type_tmp.width = 32;
+   else
+      type_tmp.width *= 2;
    wide_type = lp_build_vec_type(gallivm, type_tmp);
-   shift = lp_build_const_vec(gallivm, type_tmp, 32);
+   shift = lp_build_const_vec(gallivm, type_tmp, bld->type.width);
 
    if (bld->type.sign) {
       a = LLVMBuildSExt(builder, a, wide_type, "");