+++ /dev/null
-/* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-/* If you consider tuning this algorithm, you should consult first:
- Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
- Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
-
-#include <limits.h>
-#include <stdlib.h>
-#include <string.h>
-#include <isl_qsort.h>
-
-/* Byte-wise swap two items of size SIZE. */
-#define SWAP(a, b, size) \
- do \
- { \
- register size_t __size = (size); \
- register char *__a = (a), *__b = (b); \
- do \
- { \
- char __tmp = *__a; \
- *__a++ = *__b; \
- *__b++ = __tmp; \
- } while (--__size > 0); \
- } while (0)
-
-/* Discontinue quicksort algorithm when partition gets below this size.
- This particular magic number was chosen to work best on a Sun 4/260. */
-#define MAX_THRESH 4
-
-/* Stack node declarations used to store unfulfilled partition obligations. */
-typedef struct
- {
- char *lo;
- char *hi;
- } stack_node;
-
-/* The next 4 #defines implement a very fast in-line stack abstraction. */
-/* The stack needs log (total_elements) entries (we could even subtract
- log(MAX_THRESH)). Since total_elements has type size_t, we get as
- upper bound for log (total_elements):
- bits per byte (CHAR_BIT) * sizeof(size_t). */
-#define STACK_SIZE (CHAR_BIT * sizeof(size_t))
-#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
-#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
-#define STACK_NOT_EMPTY (stack < top)
-
-
-/* Order size using quicksort. This implementation incorporates
- four optimizations discussed in Sedgewick:
-
- 1. Non-recursive, using an explicit stack of pointer that store the
- next array partition to sort. To save time, this maximum amount
- of space required to store an array of SIZE_MAX is allocated on the
- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
- Pretty cheap, actually.
-
- 2. Chose the pivot element using a median-of-three decision tree.
- This reduces the probability of selecting a bad pivot value and
- eliminates certain extraneous comparisons.
-
- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
- insertion sort to order the MAX_THRESH items within each partition.
- This is a big win, since insertion sort is faster for small, mostly
- sorted array segments.
-
- 4. The larger of the two sub-partitions is always pushed onto the
- stack first, with the algorithm then concentrating on the
- smaller partition. This *guarantees* no more than log (total_elems)
- stack size is needed (actually O(1) in this case)! */
-
-void isl_quicksort (void *const pbase, size_t total_elems, size_t size,
- int (*cmp)(const void *, const void *, void *arg), void *arg)
-{
- register char *base_ptr = (char *) pbase;
-
- const size_t max_thresh = MAX_THRESH * size;
-
- if (total_elems == 0)
- /* Avoid lossage with unsigned arithmetic below. */
- return;
-
- if (total_elems > MAX_THRESH)
- {
- char *lo = base_ptr;
- char *hi = &lo[size * (total_elems - 1)];
- stack_node stack[STACK_SIZE];
- stack_node *top = stack;
-
- PUSH (NULL, NULL);
-
- while (STACK_NOT_EMPTY)
- {
- char *left_ptr;
- char *right_ptr;
-
- /* Select median value from among LO, MID, and HI. Rearrange
- LO and HI so the three values are sorted. This lowers the
- probability of picking a pathological pivot value and
- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
- the while loops. */
-
- char *mid = lo + size * ((hi - lo) / size >> 1);
-
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- SWAP (mid, lo, size);
- if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
- SWAP (mid, hi, size);
- else
- goto jump_over;
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
- SWAP (mid, lo, size);
- jump_over:;
-
- left_ptr = lo + size;
- right_ptr = hi - size;
-
- /* Here's the famous ``collapse the walls'' section of quicksort.
- Gotta like those tight inner loops! They are the main reason
- that this algorithm runs much faster than others. */
- do
- {
- while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
- left_ptr += size;
-
- while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
- right_ptr -= size;
-
- if (left_ptr < right_ptr)
- {
- SWAP (left_ptr, right_ptr, size);
- if (mid == left_ptr)
- mid = right_ptr;
- else if (mid == right_ptr)
- mid = left_ptr;
- left_ptr += size;
- right_ptr -= size;
- }
- else if (left_ptr == right_ptr)
- {
- left_ptr += size;
- right_ptr -= size;
- break;
- }
- }
- while (left_ptr <= right_ptr);
-
- /* Set up pointers for next iteration. First determine whether
- left and right partitions are below the threshold size. If so,
- ignore one or both. Otherwise, push the larger partition's
- bounds on the stack and continue sorting the smaller one. */
-
- if ((size_t) (right_ptr - lo) <= max_thresh)
- {
- if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore both small partitions. */
- POP (lo, hi);
- else
- /* Ignore small left partition. */
- lo = left_ptr;
- }
- else if ((size_t) (hi - left_ptr) <= max_thresh)
- /* Ignore small right partition. */
- hi = right_ptr;
- else if ((right_ptr - lo) > (hi - left_ptr))
- {
- /* Push larger left partition indices. */
- PUSH (lo, right_ptr);
- lo = left_ptr;
- }
- else
- {
- /* Push larger right partition indices. */
- PUSH (left_ptr, hi);
- hi = right_ptr;
- }
- }
- }
-
- /* Once the BASE_PTR array is partially sorted by quicksort the rest
- is completely sorted using insertion sort, since this is efficient
- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
- of the array to sort, and END_PTR points at the very last element in
- the array (*not* one beyond it!). */
-
-#define min(x, y) ((x) < (y) ? (x) : (y))
-
- {
- char *const end_ptr = &base_ptr[size * (total_elems - 1)];
- char *tmp_ptr = base_ptr;
- char *thresh = min(end_ptr, base_ptr + max_thresh);
- register char *run_ptr;
-
- /* Find smallest element in first threshold and place it at the
- array's beginning. This is the smallest array element,
- and the operation speeds up insertion sort's inner loop. */
-
- for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
- if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
- tmp_ptr = run_ptr;
-
- if (tmp_ptr != base_ptr)
- SWAP (tmp_ptr, base_ptr, size);
-
- /* Insertion sort, running from left-hand-side up to right-hand-side. */
-
- run_ptr = base_ptr + size;
- while ((run_ptr += size) <= end_ptr)
- {
- tmp_ptr = run_ptr - size;
- while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0)
- tmp_ptr -= size;
-
- tmp_ptr += size;
- if (tmp_ptr != run_ptr)
- {
- char *trav;
-
- trav = run_ptr + size;
- while (--trav >= run_ptr)
- {
- char c = *trav;
- char *hi, *lo;
-
- for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
- *hi = *lo;
- *hi = c;
- }
- }
- }
- }
-}
--- /dev/null
+/*
+ * The code of this file was taken from http://jeffreystedfast.blogspot.be,
+ * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
+ * The MIT license text is as follows:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <isl_sort.h>
+
+#define MID(lo, hi) (lo + ((hi - lo) >> 1))
+
+/* The code here is an optimized merge sort. Starting from a generic merge sort
+ * the following optimizations were applied:
+ *
+ * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
+ * every element into a temporary buffer, blocks of elements are copied
+ * at a time.
+ *
+ * o To reduce the number of memcpy() calls further, copying leading
+ * and trailing elements into our temporary buffer is avoided, in case it is
+ * not necessary to merge them.
+ *
+ * A further optimization could be to specialize memcpy calls based on the
+ * size of the types we compare. For now, this code does not include the
+ * relevant optimization, as clang e.g. inlines a very efficient memcpy()
+ * implementation. It is not clear, that the specialized version as provided in
+ * the blog post, is really superior to the one that will be inlined by
+ * default. So we decided to keep the code simple until this optimization was
+ * proven to be beneficial.
+ */
+
+static void
+msort (void *array, void *buf, size_t low, size_t high, size_t size,
+ int (* compare) (const void *, const void *, void *), void *arg)
+{
+ char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
+ size_t copied = 0;
+ size_t mid;
+
+ mid = MID (low, high);
+
+ if (mid + 1 < high)
+ msort (array, buf, mid + 1, high, size, compare, arg);
+
+ if (mid > low)
+ msort (array, buf, low, mid, size, compare, arg);
+
+ ah = ((char *) array) + ((high + 1) * size);
+ am = ((char *) array) + ((mid + 1) * size);
+ a1 = al = ((char *) array) + (low * size);
+
+ b = (char *) buf;
+ lo = al;
+ hi = am;
+
+ do {
+ ls = lo;
+ hs = hi;
+
+ if (lo > al || hi > am) {
+ /* our last loop already compared lo & hi and found lo <= hi */
+ lo += size;
+ }
+
+ while (lo < am && compare (lo, hi, arg) <= 0)
+ lo += size;
+
+ if (lo < am) {
+ if (copied == 0) {
+ /* avoid copying the leading items */
+ a1 = lo;
+ ls = lo;
+ }
+
+ /* our last compare tells us hi < lo */
+ hi += size;
+
+ while (hi < ah && compare (hi, lo, arg) < 0)
+ hi += size;
+
+ if (lo > ls) {
+ memcpy (b, ls, lo - ls);
+ copied += (lo - ls);
+ b += (lo - ls);
+ }
+
+ memcpy (b, hs, hi - hs);
+ copied += (hi - hs);
+ b += (hi - hs);
+ } else if (copied) {
+ memcpy (b, ls, lo - ls);
+ copied += (lo - ls);
+ b += (lo - ls);
+
+ /* copy everything we needed to re-order back into array */
+ memcpy (a1, buf, copied);
+ return;
+ } else {
+ /* everything already in order */
+ return;
+ }
+ } while (hi < ah);
+
+ if (lo < am) {
+ memcpy (b, lo, am - lo);
+ copied += (am - lo);
+ }
+
+ memcpy (a1, buf, copied);
+}
+
+static int
+MergeSort (void *base, size_t nmemb, size_t size,
+ int (* compare) (const void *, const void *, void *), void *arg)
+{
+ void *tmp;
+
+ if (nmemb < 2)
+ return 0;
+
+ if (!(tmp = malloc (nmemb * size))) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ msort (base, tmp, 0, nmemb - 1, size, compare, arg);
+
+ free (tmp);
+
+ return 0;
+}
+
+int isl_sort(void *const pbase, size_t total_elems, size_t size,
+ int (*cmp)(const void *, const void *, void *arg), void *arg)
+{
+ return MergeSort (pbase, total_elems, size, cmp, arg);
+}