Optimized mp multiplication

author Siddhesh Poyarekar <siddhesh@redhat.com>

Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)

committer Siddhesh Poyarekar <siddhesh@redhat.com>

Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)
author Siddhesh Poyarekar <siddhesh@redhat.com>
Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)
committer Siddhesh Poyarekar <siddhesh@redhat.com>
Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)
diff --git a/ChangeLog b/ChangeLog

index 9b224a4..743dfaf 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
  2013-02-13  Siddhesh Poyarekar  <siddhesh@redhat.com>
  
+       * sysdeps/ieee754/dbl-64/mpa.c (__mul): Don't bother with zero
+       values in the mantissa.
+
         * sysdeps/ieee754/dbl-64/mpa.c (add_magnitudes): Use ZK to
         minimize writes to Z.
         (sub_magnitudes): Simplify code a bit.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c

index 005aa0c..5b50b0d 100644 (file)
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -610,7 +610,7 @@ void
  SECTION
  __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
  {
-  int i, j, k, k2;
+  int i, j, k, ip, ip2;
    double u, zk;
  
    /* Is z=0?  */
@@ -620,11 +620,51 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
        return;
      }
  
-  /* Multiply, add and carry.  */
-  k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
-  zk = Z[k2] = ZERO;
+  /* We need not iterate through all X's and Y's since it's pointless to
+     multiply zeroes.  Here, both are zero...  */
+  for (ip2 = p; ip2 > 0; ip2--)
+    if (X[ip2] != ZERO || Y[ip2] != ZERO)
+      break;
  
-  for (k = k2; k > p; k--)
+  /* ... and here, at least one of them is still zero.  */
+  for (ip = ip2; ip > 0; ip--)
+    if (X[ip] * Y[ip] != ZERO)
+      break;
+
+  /* The product looks like this for p = 3 (as an example):
+
+
+                               a1    a2    a3
+                x              b1    b2    b3
+                -----------------------------
+                            a1*b3 a2*b3 a3*b3
+                      a1*b2 a2*b2 a3*b2
+                a1*b1 a2*b1 a3*b1
+
+     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
+     for P >= 3.  We compute the above digits in two parts; the last P-1
+     digits and then the first P digits.  The last P-1 digits are a sum of
+     products of the input digits from P to P-k where K is 0 for the least
+     significant digit and increases as we go towards the left.  The product
+     term is of the form X[k]*X[P-k] as can be seen in the above example.
+
+     The first P digits are also a sum of products with the same product term,
+     except that the sum is from 1 to k.  This is also evident from the above
+     example.
+
+     Another thing that becomes evident is that only the most significant
+     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
+     'internal precision' of the input numbers, i.e. digits after ip and ip2
+     are all ZERO.  */
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > ip + ip2 + 1)
+    Z[k--] = ZERO;
+
+  zk = Z[k] = ZERO;
+
+  while (k > p)
      {
        for (i = k - p, j = p; i < p + 1; i++, j--)
         zk += X[i] * Y[j];
@@ -632,10 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
        u = (zk + CUTTER) - CUTTER;
        if (u > zk)
         u -= RADIX;
-      Z[k] = zk - u;
+      Z[k--] = zk - u;
        zk = u * RADIXI;
      }
  
+  /* The real deal.  */
    while (k > 1)
      {
        for (i = 1, j = k - 1; i < k; i++, j--)
@@ -644,9 +685,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
        u = (zk + CUTTER) - CUTTER;
        if (u > zk)
         u -= RADIX;
-      Z[k] = zk - u;
+      Z[k--] = zk - u;
        zk = u * RADIXI;
-      k--;
      }
    Z[k] = zk;
author	Siddhesh Poyarekar <siddhesh@redhat.com>
	Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)
committer	Siddhesh Poyarekar <siddhesh@redhat.com>
	Wed, 13 Feb 2013 08:46:23 +0000 (14:16 +0530)
ChangeLog		patch \| blob \| history
sysdeps/ieee754/dbl-64/mpa.c		patch \| blob \| history