isl_sort.c

   1 /*
   2  * The code of this file was taken from http://jeffreystedfast.blogspot.be,
   3  * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
   4  * The MIT license text is as follows:
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #include <errno.h>
  26 #include <string.h>
  27 #include <stdlib.h>
  28 #include <isl_sort.h>
  29
  30 #define MID(lo, hi) (lo + ((hi - lo) >> 1))
  31
  32 /* The code here is an optimized merge sort. Starting from a generic merge sort
  33  * the following optimizations were applied:
  34  *
  35  * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
  36  *   every element into a temporary buffer, blocks of elements are copied
  37  *   at a time.
  38  *
  39  * o To reduce the number of memcpy() calls further, copying leading
  40  *   and trailing elements into our temporary buffer is avoided, in case it is
  41  *   not necessary to merge them.
  42  *
  43  * A further optimization could be to specialize memcpy calls based on the
  44  * size of the types we compare. For now, this code does not include the
  45  * relevant optimization, as clang e.g. inlines a very efficient memcpy()
  46  * implementation. It is not clear, that the specialized version as provided in
  47  * the blog post, is really superior to the one that will be inlined by
  48  * default. So we decided to keep the code simple until this optimization was
  49  * proven to be beneficial.
  50  */
  51
  52 static void
  53 msort (void *array, void *buf, size_t low, size_t high, size_t size,
  54        int (* compare) (const void *, const void *, void *), void *arg)
  55 {
  56     char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
  57     size_t copied = 0;
  58     size_t mid;
  59
  60     mid = MID (low, high);
  61
  62     if (mid + 1 < high)
  63         msort (array, buf, mid + 1, high, size, compare, arg);
  64
  65     if (mid > low)
  66         msort (array, buf, low, mid, size, compare, arg);
  67
  68     ah = ((char *) array) + ((high + 1) * size);
  69     am = ((char *) array) + ((mid + 1) * size);
  70     a1 = al = ((char *) array) + (low * size);
  71
  72     b = (char *) buf;
  73     lo = al;
  74     hi = am;
  75
  76     do {
  77         ls = lo;
  78         hs = hi;
  79
  80         if (lo > al || hi > am) {
  81             /* our last loop already compared lo & hi and found lo <= hi */
  82             lo += size;
  83         }
  84
  85         while (lo < am && compare (lo, hi, arg) <= 0)
  86             lo += size;
  87
  88         if (lo < am) {
  89             if (copied == 0) {
  90                 /* avoid copying the leading items */
  91                 a1 = lo;
  92                 ls = lo;
  93             }
  94
  95             /* our last compare tells us hi < lo */
  96             hi += size;
  97
  98             while (hi < ah && compare (hi, lo, arg) < 0)
  99                 hi += size;
 100
 101             if (lo > ls) {
 102                 memcpy (b, ls, lo - ls);
 103                 copied += (lo - ls);
 104                 b += (lo - ls);
 105             }
 106
 107             memcpy (b, hs, hi - hs);
 108             copied += (hi - hs);
 109             b += (hi - hs);
 110         } else if (copied) {
 111             memcpy (b, ls, lo - ls);
 112             copied += (lo - ls);
 113             b += (lo - ls);
 114
 115             /* copy everything we needed to re-order back into array */
 116             memcpy (a1, buf, copied);
 117             return;
 118         } else {
 119             /* everything already in order */
 120             return;
 121         }
 122     } while (hi < ah);
 123
 124     if (lo < am) {
 125         memcpy (b, lo, am - lo);
 126         copied += (am - lo);
 127     }
 128
 129     memcpy (a1, buf, copied);
 130 }
 131
 132 static int
 133 MergeSort (void *base, size_t nmemb, size_t size,
 134            int (* compare) (const void *, const void *, void *), void *arg)
 135 {
 136     void *tmp;
 137
 138     if (nmemb < 2)
 139         return 0;
 140
 141     if (!(tmp = malloc (nmemb * size))) {
 142         errno = ENOMEM;
 143         return -1;
 144     }
 145
 146     msort (base, tmp, 0, nmemb - 1, size, compare, arg);
 147
 148     free (tmp);
 149
 150     return 0;
 151 }
 152
 153 int isl_sort(void *const pbase, size_t total_elems, size_t size,
 154         int (*cmp)(const void *, const void *, void *arg), void *arg)
 155 {
 156     return MergeSort (pbase, total_elems, size, cmp, arg);
 157 }