log10: Use sw implementation from amd builtins
authorJan Vesely <jan.vesely@rutgers.edu>
Mon, 23 Apr 2018 21:10:42 +0000 (21:10 +0000)
committerJan Vesely <jan.vesely@rutgers.edu>
Mon, 23 Apr 2018 21:10:42 +0000 (21:10 +0000)
Add missing table.
Fixes log10d CTS on carrizo.
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Acked-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 330649

libclc/generic/lib/math/log10.cl
libclc/generic/lib/math/log10.inc [deleted file]
libclc/generic/lib/math/log_base.h
libclc/generic/lib/math/tables.cl
libclc/generic/lib/math/tables.h

index 8216f9b..35a53a1 100644 (file)
@@ -1,4 +1,39 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
 #include <clc/clc.h>
+#include "../clcmacro.h"
+#include "tables.h"
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif // cl_khr_fp64
+
+#define COMPILING_LOG10
+#include "log_base.h"
+#undef COMPILING_LOG10
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float);
 
-#define __CLC_BODY <log10.inc>
-#include <clc/math/gentype.inc>
+#ifdef cl_khr_fp64
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
+#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/log10.inc b/libclc/generic/lib/math/log10.inc
deleted file mode 100644 (file)
index 423308a..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) {
-  // log10(x) = log2(x) / log2(10)
-  // 1 / log2(10) = 0.30102999566 = log10(2)
-  // SP representation is 0.30103 (0x1.344136p-2)
-  // DP representation is 0.301029995659999993762312442414(0x1.34413509E61D8p-2)
-#if __CLC_FPSIZE == 32
-  return log2(val) * 0x1.344136p-2f;
-#elif __CLC_FPSIZE == 64
-  return log2(val) * 0x1.34413509E61D8p-2;
-#else
-#error unknown _CLC_FPSIZE
-#endif
-}
index bf2f82b..f5b6f1c 100644 (file)
@@ -92,14 +92,12 @@ log(float x)
     const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
     const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
 #elif defined(COMPILING_LOG10)
-    USE_TABLE(float2, p_log, LOG10_TBL);
     const float LOG10E = 0x1.bcb7b2p-2f;        // 0.43429448190325182
     const float LOG10E_HEAD = 0x1.bc0000p-2f;   // 0.43359375
     const float LOG10E_TAIL = 0x1.6f62a4p-11f;  // 0.0007007319
     const float LOG10_2_HEAD = 0x1.340000p-2f;  // 0.30078125
     const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
 #else
-    USE_TABLE(float2, p_log, LOGE_TBL);
     const float LOG2_HEAD = 0x1.62e000p-1f;  // 0.693115234
     const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
 #endif
@@ -158,11 +156,11 @@ log(float x)
     z1 = tv.s0 + mf;
     z2 = mad(poly, -LOG2E, tv.s1);
 #elif defined(COMPILING_LOG10)
-    float2 tv = p_log[indx];
+    float2 tv = USE_TABLE(log10_tbl, indx);
     z1 = mad(mf, LOG10_2_HEAD, tv.s0);
     z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
 #else
-    float2 tv = p_log[indx];
+    float2 tv = USE_TABLE(log_tbl, indx);
     z1 = mad(mf, LOG2_HEAD, tv.s0);
     z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
 #endif
index b72fddd..596487c 100644 (file)
@@ -552,6 +552,138 @@ DECLARE_TABLE(float2, LOG2_TBL, 129) = {
     (float2)(0x1.000000p+0f, 0x0.000000p+0f)
 };
 
+DECLARE_TABLE(float2, LOG10_TBL, 129) = {
+    (float2)(0x0.000000p+0f, 0x0.000000p+0f),
+    (float2)(0x1.ba8000p-9f, 0x1.f51c88p-19f),
+    (float2)(0x1.b90000p-8f, 0x1.1da93ep-18f),
+    (float2)(0x1.498000p-7f, 0x1.8428a2p-18f),
+    (float2)(0x1.b58000p-7f, 0x1.a423acp-17f),
+    (float2)(0x1.108000p-6f, 0x1.41d422p-17f),
+    (float2)(0x1.458000p-6f, 0x1.d3d6b2p-16f),
+    (float2)(0x1.7a8000p-6f, 0x1.70f7cep-16f),
+    (float2)(0x1.af0000p-6f, 0x1.7e4ac0p-16f),
+    (float2)(0x1.e38000p-6f, 0x1.ab2f40p-24f),
+    (float2)(0x1.0b8000p-5f, 0x1.00d40ap-16f),
+    (float2)(0x1.250000p-5f, 0x1.40b03ep-15f),
+    (float2)(0x1.3e8000p-5f, 0x1.446668p-15f),
+    (float2)(0x1.580000p-5f, 0x1.1c7758p-16f),
+    (float2)(0x1.710000p-5f, 0x1.20d09ep-15f),
+    (float2)(0x1.8a0000p-5f, 0x1.fd6f5cp-16f),
+    (float2)(0x1.a30000p-5f, 0x1.53ac12p-18f),
+    (float2)(0x1.bb8000p-5f, 0x1.4d02c6p-16f),
+    (float2)(0x1.d40000p-5f, 0x1.d5164ep-17f),
+    (float2)(0x1.ec0000p-5f, 0x1.991facp-15f),
+    (float2)(0x1.020000p-4f, 0x1.0a307cp-14f),
+    (float2)(0x1.0e0000p-4f, 0x1.e94ec0p-15f),
+    (float2)(0x1.1a0000p-4f, 0x1.1a22a8p-15f),
+    (float2)(0x1.258000p-4f, 0x1.d4857ap-14f),
+    (float2)(0x1.318000p-4f, 0x1.982ae2p-15f),
+    (float2)(0x1.3d0000p-4f, 0x1.74cd70p-14f),
+    (float2)(0x1.488000p-4f, 0x1.cfb476p-14f),
+    (float2)(0x1.540000p-4f, 0x1.ddcc64p-14f),
+    (float2)(0x1.5f8000p-4f, 0x1.a01222p-14f),
+    (float2)(0x1.6b0000p-4f, 0x1.177dbcp-14f),
+    (float2)(0x1.768000p-4f, 0x1.140a24p-16f),
+    (float2)(0x1.818000p-4f, 0x1.298f40p-14f),
+    (float2)(0x1.8c8000p-4f, 0x1.c60e20p-14f),
+    (float2)(0x1.980000p-4f, 0x1.b65052p-18f),
+    (float2)(0x1.a30000p-4f, 0x1.53ac12p-17f),
+    (float2)(0x1.ad8000p-4f, 0x1.f41d04p-14f),
+    (float2)(0x1.b88000p-4f, 0x1.7934eap-14f),
+    (float2)(0x1.c38000p-4f, 0x1.75252ep-15f),
+    (float2)(0x1.ce0000p-4f, 0x1.b90790p-14f),
+    (float2)(0x1.d90000p-4f, 0x1.d5866ap-16f),
+    (float2)(0x1.e38000p-4f, 0x1.e0d586p-15f),
+    (float2)(0x1.ee0000p-4f, 0x1.2ae984p-14f),
+    (float2)(0x1.f88000p-4f, 0x1.25a0d0p-14f),
+    (float2)(0x1.018000p-3f, 0x1.c2a064p-15f),
+    (float2)(0x1.068000p-3f, 0x1.2f59e8p-13f),
+    (float2)(0x1.0b8000p-3f, 0x1.cf424cp-13f),
+    (float2)(0x1.110000p-3f, 0x1.42f080p-15f),
+    (float2)(0x1.160000p-3f, 0x1.684156p-14f),
+    (float2)(0x1.1b0000p-3f, 0x1.f38f64p-14f),
+    (float2)(0x1.200000p-3f, 0x1.22077ap-13f),
+    (float2)(0x1.250000p-3f, 0x1.2d34d6p-13f),
+    (float2)(0x1.2a0000p-3f, 0x1.1ba328p-13f),
+    (float2)(0x1.2f0000p-3f, 0x1.db48e2p-14f),
+    (float2)(0x1.340000p-3f, 0x1.4712a0p-14f),
+    (float2)(0x1.390000p-3f, 0x1.ed0894p-16f),
+    (float2)(0x1.3d8000p-3f, 0x1.bc39b6p-13f),
+    (float2)(0x1.428000p-3f, 0x1.1f9ff8p-13f),
+    (float2)(0x1.478000p-3f, 0x1.a07d3ap-15f),
+    (float2)(0x1.4c0000p-3f, 0x1.9601fap-13f),
+    (float2)(0x1.510000p-3f, 0x1.532214p-14f),
+    (float2)(0x1.558000p-3f, 0x1.a31462p-13f),
+    (float2)(0x1.5a8000p-3f, 0x1.05a584p-14f),
+    (float2)(0x1.5f0000p-3f, 0x1.4911c8p-13f),
+    (float2)(0x1.638000p-3f, 0x1.f615fep-13f),
+    (float2)(0x1.688000p-3f, 0x1.1445b0p-14f),
+    (float2)(0x1.6d0000p-3f, 0x1.057abcp-13f),
+    (float2)(0x1.718000p-3f, 0x1.685f0ap-13f),
+    (float2)(0x1.760000p-3f, 0x1.b31022p-13f),
+    (float2)(0x1.7a8000p-3f, 0x1.e5cd62p-13f),
+    (float2)(0x1.7f8000p-3f, 0x1.aa6ca8p-22f),
+    (float2)(0x1.840000p-3f, 0x1.1944bcp-19f),
+    (float2)(0x1.880000p-3f, 0x1.f0b980p-13f),
+    (float2)(0x1.8c8000p-3f, 0x1.c60e20p-13f),
+    (float2)(0x1.910000p-3f, 0x1.849daep-13f),
+    (float2)(0x1.958000p-3f, 0x1.2ca202p-13f),
+    (float2)(0x1.9a0000p-3f, 0x1.7ca842p-14f),
+    (float2)(0x1.9e8000p-3f, 0x1.cf6180p-16f),
+    (float2)(0x1.a28000p-3f, 0x1.9fa186p-13f),
+    (float2)(0x1.a70000p-3f, 0x1.df5554p-14f),
+    (float2)(0x1.ab8000p-3f, 0x1.51eaccp-16f),
+    (float2)(0x1.af8000p-3f, 0x1.4f8e88p-13f),
+    (float2)(0x1.b40000p-3f, 0x1.7f49aap-15f),
+    (float2)(0x1.b80000p-3f, 0x1.5b3c72p-13f),
+    (float2)(0x1.bc8000p-3f, 0x1.07fd5cp-15f),
+    (float2)(0x1.c08000p-3f, 0x1.144d18p-13f),
+    (float2)(0x1.c48000p-3f, 0x1.d25700p-13f),
+    (float2)(0x1.c90000p-3f, 0x1.f1369ep-15f),
+    (float2)(0x1.cd0000p-3f, 0x1.1260fap-13f),
+    (float2)(0x1.d10000p-3f, 0x1.94c038p-13f),
+    (float2)(0x1.d58000p-3f, 0x1.ccfdb8p-20f),
+    (float2)(0x1.d98000p-3f, 0x1.7c70dap-15f),
+    (float2)(0x1.dd8000p-3f, 0x1.4ee87ap-14f),
+    (float2)(0x1.e18000p-3f, 0x1.b99d86p-14f),
+    (float2)(0x1.e58000p-3f, 0x1.feafc0p-14f),
+    (float2)(0x1.e98000p-3f, 0x1.0f3b16p-13f),
+    (float2)(0x1.ed8000p-3f, 0x1.0ca34cp-13f),
+    (float2)(0x1.f18000p-3f, 0x1.ef75b2p-14f),
+    (float2)(0x1.f58000p-3f, 0x1.a15704p-14f),
+    (float2)(0x1.f98000p-3f, 0x1.2f3cfap-14f),
+    (float2)(0x1.fd8000p-3f, 0x1.32f1dcp-15f),
+    (float2)(0x1.008000p-2f, 0x1.f02d90p-13f),
+    (float2)(0x1.028000p-2f, 0x1.821964p-13f),
+    (float2)(0x1.048000p-2f, 0x1.02a708p-13f),
+    (float2)(0x1.068000p-2f, 0x1.c7f450p-15f),
+    (float2)(0x1.080000p-2f, 0x1.e820cap-12f),
+    (float2)(0x1.0a0000p-2f, 0x1.8ecd14p-12f),
+    (float2)(0x1.0c0000p-2f, 0x1.2d15f4p-12f),
+    (float2)(0x1.0e0000p-2f, 0x1.861b72p-13f),
+    (float2)(0x1.100000p-2f, 0x1.4319e6p-14f),
+    (float2)(0x1.118000p-2f, 0x1.d6520ep-12f),
+    (float2)(0x1.138000p-2f, 0x1.53c218p-12f),
+    (float2)(0x1.158000p-2f, 0x1.925000p-13f),
+    (float2)(0x1.178000p-2f, 0x1.b4a7a2p-15f),
+    (float2)(0x1.190000p-2f, 0x1.9c19eep-12f),
+    (float2)(0x1.1b0000p-2f, 0x1.f38f64p-13f),
+    (float2)(0x1.1d0000p-2f, 0x1.3ebb32p-14f),
+    (float2)(0x1.1e8000p-2f, 0x1.9ddf96p-12f),
+    (float2)(0x1.208000p-2f, 0x1.c8d472p-13f),
+    (float2)(0x1.228000p-2f, 0x1.1af536p-15f),
+    (float2)(0x1.240000p-2f, 0x1.5acca0p-12f),
+    (float2)(0x1.260000p-2f, 0x1.158770p-13f),
+    (float2)(0x1.278000p-2f, 0x1.b35350p-12f),
+    (float2)(0x1.298000p-2f, 0x1.a91532p-13f),
+    (float2)(0x1.2b0000p-2f, 0x1.ee7896p-12f),
+    (float2)(0x1.2d0000p-2f, 0x1.012c1cp-12f),
+    (float2)(0x1.2f0000p-2f, 0x1.967ab4p-17f),
+    (float2)(0x1.308000p-2f, 0x1.111e3cp-12f),
+    (float2)(0x1.328000p-2f, 0x1.cf340ep-17f),
+    (float2)(0x1.340000p-2f, 0x1.04d426p-12f),
+};
+
 DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
     224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
     169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
@@ -880,6 +1012,7 @@ TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
 TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
 TABLE_FUNCTION(float2, LOG_INV_TBL_EP, log_inv_tbl_ep);
 TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
+TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
 
 uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
     return *(__constant uint4 *)(PIBITS_TBL + idx);
index 8e1d773..8045242 100644 (file)
@@ -42,6 +42,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl);
 TABLE_FUNCTION_DECL(float, log_inv_tbl);
 TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep);
 TABLE_FUNCTION_DECL(float2, log2_tbl);
+TABLE_FUNCTION_DECL(float2, log10_tbl);
 TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
 TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
 TABLE_FUNCTION_DECL(float2, cbrt_tbl);