From: adam Date: Wed, 9 Jan 2013 06:43:13 +0000 (+0700) Subject: v1.0.44 #31 X-Git-Tag: v1.2.12~524 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=eef1536428ff7dee8c680903e7952f1f90ebbd96;p=platform%2Fupstream%2Fejdb.git v1.0.44 #31 --- diff --git a/.idea/jsLibraryMappings.xml b/.idea/jsLibraryMappings.xml index f6e673a..4467249 100644 --- a/.idea/jsLibraryMappings.xml +++ b/.idea/jsLibraryMappings.xml @@ -1,3 +1,8 @@ - + + + + + + diff --git a/Changelog b/Changelog index 2106470..42ed1d0 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,7 @@ +2013-01-09 Anton Adamansky. + * Fixed crash on sorting (ticket #31) + - Release 1.0.44 + 2013-01-08 Anton Adamansky. * Fixed #28 * Deprecated node.js API removeCollection() use dropCollection() instead. diff --git a/node/nbproject/configurations.xml b/node/nbproject/configurations.xml index 17ebec1..a5e8140 100644 --- a/node/nbproject/configurations.xml +++ b/node/nbproject/configurations.xml @@ -3284,6 +3284,7 @@ nodejs/src nodejs/deps/v8/src + nodejs/deps/uv/include ../tcejdb diff --git a/package.json b/package.json index 1adf897..1bd6f52 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name" : "ejdb", - "version" : "1.0.43", + "version" : "1.0.44", "main" : "node/ejdb.js", "homepage" : "http://ejdb.org", "description" : "EJDB - Embedded JSON Database engine", diff --git a/tcejdb/Makefile.in b/tcejdb/Makefile.in index 154f9be..20aed52 100644 --- a/tcejdb/Makefile.in +++ b/tcejdb/Makefile.in @@ -829,6 +829,8 @@ ejdb.o : myconf.h ejdb.h ejdb_private.h ejdbutl.h bson.o : myconf.h bson.h +ejdbutl.o : ejdbutl.h + utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -o utf8proc.o utf8proc.c diff --git a/tcejdb/configure b/tcejdb/configure index dcda71e..ebcb713 100755 --- a/tcejdb/configure +++ b/tcejdb/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for tcejdb 1.0.43. +# Generated by GNU Autoconf 2.69 for tcejdb 1.0.44. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='tcejdb' PACKAGE_TARNAME='tcejdb' -PACKAGE_VERSION='1.0.43' -PACKAGE_STRING='tcejdb 1.0.43' +PACKAGE_VERSION='1.0.44' +PACKAGE_STRING='tcejdb 1.0.44' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1257,7 +1257,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures tcejdb 1.0.43 to adapt to many kinds of systems. +\`configure' configures tcejdb 1.0.44 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1318,7 +1318,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of tcejdb 1.0.43:";; + short | recursive ) echo "Configuration of tcejdb 1.0.44:";; esac cat <<\_ACEOF @@ -1424,7 +1424,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -tcejdb configure 1.0.43 +tcejdb configure 1.0.44 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1722,7 +1722,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by tcejdb $as_me 1.0.43, which was +It was created by tcejdb $as_me 1.0.44, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2080,7 +2080,7 @@ MYFORMATVER="1.0" MYHEADERFILES="tcutil.h tchdb.h tcbdb.h tcfdb.h tctdb.h tcadb.h ejdb.h ejdb_private.h bson.h myconf.h" MYLIBRARYFILES="libtcejdb.a" MYLIBOBJFILES="tcutil.o tchdb.o tcbdb.o tcfdb.o tctdb.o tcadb.o myconf.o md5.o ejdb.o \ -bson.o timsort.o numbers.o encoding.o utf8proc.o" +bson.o numbers.o encoding.o utf8proc.o ejdbutl.o" MYCOMMANDFILES="tcutest tcumttest tcucodec tchtest tchmttest tchmgr" MYCOMMANDFILES="$MYCOMMANDFILES tcbtest tcbmttest tcbmgr tcftest tcfmttest tcfmgr" MYCOMMANDFILES="$MYCOMMANDFILES tcttest tctmttest tctmgr tcatest tcamttest tcamgr" @@ -4870,7 +4870,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by tcejdb $as_me 1.0.43, which was +This file was extended by tcejdb $as_me 1.0.44, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -4923,7 +4923,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -tcejdb config.status 1.0.43 +tcejdb config.status 1.0.44 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/tcejdb/configure.ac b/tcejdb/configure.ac index 02ac3c0..0650566 100644 --- a/tcejdb/configure.ac +++ b/tcejdb/configure.ac @@ -11,7 +11,7 @@ test -n "$CPPFLAGS" && MYCPPFLAGS="$CPPFLAGS $MYCPPFLAGS" test -n "$LDFLAGS" && MYLDFLAGS="$LDFLAGS $MYLDFLAGS" # Package name -AC_INIT(tcejdb, 1.0.43) +AC_INIT(tcejdb, 1.0.44) # Package information MYLIBVER=9 @@ -22,7 +22,7 @@ MYFORMATVER="1.0" MYHEADERFILES="tcutil.h tchdb.h tcbdb.h tcfdb.h tctdb.h tcadb.h ejdb.h ejdb_private.h bson.h myconf.h" MYLIBRARYFILES="libtcejdb.a" MYLIBOBJFILES="tcutil.o tchdb.o tcbdb.o tcfdb.o tctdb.o tcadb.o myconf.o md5.o ejdb.o \ -bson.o timsort.o numbers.o encoding.o utf8proc.o" +bson.o numbers.o encoding.o utf8proc.o ejdbutl.o" MYCOMMANDFILES="tcutest tcumttest tcucodec tchtest tchmttest tchmgr" MYCOMMANDFILES="$MYCOMMANDFILES tcbtest tcbmttest tcbmgr tcftest tcfmttest tcfmgr" MYCOMMANDFILES="$MYCOMMANDFILES tcttest tctmttest tctmgr tcatest tcamttest tcamgr" diff --git a/tcejdb/ejdb.c b/tcejdb/ejdb.c index c6ebcbe..7f8b02f 100644 --- a/tcejdb/ejdb.c +++ b/tcejdb/ejdb.c @@ -2511,9 +2511,7 @@ sorting: /* Sorting resultset */ _EJBSORTCTX sctx; //sorting context sctx.ofs = ofs; sctx.ofsz = ofsz; - if (ejdbtimsortlist(res, _ejdbsoncmp, &sctx)) { - _ejdbsetecode(jcoll->jb, JBEQRSSORTING, __FILE__, __LINE__, __func__); - } + ejdbqsortlist(res, _ejdbsoncmp, &sctx); finish: //check $upsert operation diff --git a/tcejdb/ejdbutl.c b/tcejdb/ejdbutl.c new file mode 100644 index 0000000..77247e5 --- /dev/null +++ b/tcejdb/ejdbutl.c @@ -0,0 +1,283 @@ + + +//////////////////////////////////////////////////////////////////////////// +// MODIFIED LIBC QSORT // +// Comparsion function have addition opaque param // +//////////////////////////////////////////////////////////////////////////// + + +/* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* If you consider tuning this algorithm, you should consult first: + Engineering a sort function; Jon Bentley and M. Douglas McIlroy; + Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ + +#include +#include +#include + +/* Byte-wise swap two items of size SIZE. */ +#define SWAP(a, b, size) \ + do \ + { \ + register size_t __size = (size); \ + register char *__a = (a), *__b = (b); \ + do \ + { \ + char __tmp = *__a; \ + *__a++ = *__b; \ + *__b++ = __tmp; \ + } while (--__size > 0); \ + } while (0) + +/* Discontinue quicksort algorithm when partition gets below this size. + This particular magic number was chosen to work best on a Sun 4/260. */ +#define MAX_THRESH 4 + +/* Stack node declarations used to store unfulfilled partition obligations. */ +typedef struct + { + char *lo; + char *hi; + } stack_node; + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH)). Since total_elements has type size_t, we get as + upper bound for log (total_elements): + bits per byte (CHAR_BIT) * sizeof(size_t). */ +#define STACK_SIZE (CHAR_BIT * sizeof(size_t)) +#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) +#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) +#define STACK_NOT_EMPTY (stack < top) + + +typedef int (*EJCMPF) (const void *, const void *, void *op); + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving + insertion sort to order the MAX_THRESH items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + + +static void _quicksort (void *const pbase, size_t total_elems, size_t size, EJCMPF cmp, void *op) +{ + register char *base_ptr = (char *) pbase; + const size_t max_thresh = MAX_THRESH * size; + + if (total_elems == 0) + /* Avoid lossage with unsigned arithmetic below. */ + return; + + if (total_elems > MAX_THRESH) + { + char *lo = base_ptr; + char *hi = &lo[size * (total_elems - 1)]; + stack_node stack[STACK_SIZE]; + stack_node *top = stack; + + PUSH (NULL, NULL); + + while (STACK_NOT_EMPTY) + { + char *left_ptr; + char *right_ptr; + + /* Select median value from among LO, MID, and HI. Rearrange + LO and HI so the three values are sorted. This lowers the + probability of picking a pathological pivot value and + skips a comparison for both the LEFT_PTR and RIGHT_PTR in + the while loops. */ + + char *mid = lo + size * ((hi - lo) / size >> 1); + + if ((*cmp) ((void *) mid, (void *) lo, (void *) op) < 0) + SWAP (mid, lo, size); + if ((*cmp) ((void *) hi, (void *) mid, (void *) op) < 0) + SWAP (mid, hi, size); + else + goto jump_over; + if ((*cmp) ((void *) mid, (void *) lo, (void *) op) < 0) + SWAP (mid, lo, size); + jump_over:; + + left_ptr = lo + size; + right_ptr = hi - size; + + /* Here's the famous ``collapse the walls'' section of quicksort. + Gotta like those tight inner loops! They are the main reason + that this algorithm runs much faster than others. */ + do + { + while ((*cmp) ((void *) left_ptr, (void *) mid, (void *) op) < 0) + left_ptr += size; + + while ((*cmp) ((void *) mid, (void *) right_ptr, (void *) op) < 0) + right_ptr -= size; + + if (left_ptr < right_ptr) + { + SWAP (left_ptr, right_ptr, size); + if (mid == left_ptr) + mid = right_ptr; + else if (mid == right_ptr) + mid = left_ptr; + left_ptr += size; + right_ptr -= size; + } + else if (left_ptr == right_ptr) + { + left_ptr += size; + right_ptr -= size; + break; + } + } + while (left_ptr <= right_ptr); + + /* Set up pointers for next iteration. First determine whether + left and right partitions are below the threshold size. If so, + ignore one or both. Otherwise, push the larger partition's + bounds on the stack and continue sorting the smaller one. */ + + if ((size_t) (right_ptr - lo) <= max_thresh) + { + if ((size_t) (hi - left_ptr) <= max_thresh) + /* Ignore both small partitions. */ + POP (lo, hi); + else + /* Ignore small left partition. */ + lo = left_ptr; + } + else if ((size_t) (hi - left_ptr) <= max_thresh) + /* Ignore small right partition. */ + hi = right_ptr; + else if ((right_ptr - lo) > (hi - left_ptr)) + { + /* Push larger left partition indices. */ + PUSH (lo, right_ptr); + lo = left_ptr; + } + else + { + /* Push larger right partition indices. */ + PUSH (left_ptr, hi); + hi = right_ptr; + } + } + } + + /* Once the BASE_PTR array is partially sorted by quicksort the rest + is completely sorted using insertion sort, since this is efficient + for partitions below MAX_THRESH size. BASE_PTR points to the beginning + of the array to sort, and END_PTR points at the very last element in + the array (*not* one beyond it!). */ + +#define min(x, y) ((x) < (y) ? (x) : (y)) + + { + char *const end_ptr = &base_ptr[size * (total_elems - 1)]; + char *tmp_ptr = base_ptr; + char *thresh = min(end_ptr, base_ptr + max_thresh); + register char *run_ptr; + + /* Find smallest element in first threshold and place it at the + array's beginning. This is the smallest array element, + and the operation speeds up insertion sort's inner loop. */ + + for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) + if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, (void *) op) < 0) + tmp_ptr = run_ptr; + + if (tmp_ptr != base_ptr) + SWAP (tmp_ptr, base_ptr, size); + + /* Insertion sort, running from left-hand-side up to right-hand-side. */ + + run_ptr = base_ptr + size; + while ((run_ptr += size) <= end_ptr) + { + tmp_ptr = run_ptr - size; + while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, (void *) op) < 0) + tmp_ptr -= size; + + tmp_ptr += size; + if (tmp_ptr != run_ptr) + { + char *trav; + + trav = run_ptr + size; + while (--trav >= run_ptr) + { + char c = *trav; + char *hi, *lo; + + for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) + *hi = *lo; + *hi = c; + } + } + } + } +} + +/////////////////////////////// EOF LIBC QSORT //////////////////////////////////// + +#include "ejdbutl.h" + +typedef struct { + int (*compare)(const TCLISTDATUM*, const TCLISTDATUM*, void *opaque); + void *tcopaque; +} tclistdata; + +static inline int tclistcmp(const void* a, const void* b, void* o) { + tclistdata* op = o; + assert(op && op->compare); + return op->compare(a, b, op->tcopaque); +} + +void ejdbqsort(void *a, size_t nel, size_t width, int (*compare) (const void *, const void *, void *opaque), void *opaque) { + _quicksort(a, nel, width, compare, opaque); +} + +void ejdbqsortlist(TCLIST *list, int (*compare) (const TCLISTDATUM*, const TCLISTDATUM*, void *opaque), void *opaque) { + tclistdata op; + op.compare = compare; + op.tcopaque = opaque; + ejdbqsort(list->array + list->start, list->num, sizeof(list->array[0]), tclistcmp, &op); +} diff --git a/tcejdb/ejdbutl.h b/tcejdb/ejdbutl.h index 195d339..c712c31 100644 --- a/tcejdb/ejdbutl.h +++ b/tcejdb/ejdbutl.h @@ -13,61 +13,10 @@ EJDB_EXTERN_C_START -/** - * A stable, adaptive, iterative mergesort that requires far fewer than - * n lg(n) comparisons when running on partially sorted arrays, while - * offering performance comparable to a traditional mergesort when run - * on random arrays. Like all proper mergesorts, this sort is stable and - * runs O(n log n) time (worst case). In the worst case, this sort requires - * temporary storage space for n/2 object references; in the best case, - * it requires only a small constant amount of space. - * - * This implementation was adapted from Josh Bloch's Java implementation of - * Tim Peters's list sort for Python, which is described in detail here: - * - * http://svn.python.org/projects/python/trunk/Objects/listsort.txt - * - * Tim's C code may be found here: - * - * http://svn.python.org/projects/python/trunk/Objects/listobject.c - * - * Josh's (Apache 2.0 Licenced) Java code may be found here: - * - * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/main/java/util/TimSort.java?view=co - * - * The underlying techniques are described in this paper (and may have - * even earlier origins): - * - * "Optimistic Sorting and Information Theoretic Complexity" - * Peter McIlroy - * SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms), - * pp 467-474, Austin, Texas, 25-27 January 1993. - * - * While the API to this class consists solely of static methods, it is - * (privately) instantiable; a TimSort instance holds the state of an ongoing - * sort, assuming the input array is large enough to warrant the full-blown - * TimSort. Small arrays are sorted in place, using a binary insertion sort. - * - * C implementation: - * https://github.com/patperry/timsort - * - * - * @param a the array to be sorted - * @param nel the length of the array - * @param c the comparator to determine the order of the sort - * @param width the element width - * @param opaque data for the comparator function - * @param opaque data for the comparator function - * - * @author Josh Bloch - * @author Patrick O. Perry - */ -int ejdbtimsort(void *a, size_t nel, size_t width, - int (*c) (const void *, const void *, void *opaque), void *opaque); +void ejdbqsort(void *a, size_t nel, size_t width, int (*compare) (const void *, const void *, void *opaque), void *opaque); -int ejdbtimsortlist(TCLIST *list, - int (*compar) (const TCLISTDATUM*, const TCLISTDATUM*, void *opaque), void *opaque); +void ejdbqsortlist(TCLIST *list, int (*compar) (const TCLISTDATUM*, const TCLISTDATUM*, void *opaque), void *opaque); EJDB_EXTERN_C_END diff --git a/tcejdb/nbproject/configurations.xml b/tcejdb/nbproject/configurations.xml index 1be6e8f..788bb5f 100644 --- a/tcejdb/nbproject/configurations.xml +++ b/tcejdb/nbproject/configurations.xml @@ -17,8 +17,6 @@ tctest.c tdbtest.c - - @@ -42,6 +40,7 @@ ejdb.c ejdb.h ejdb_private.h + ejdbutl.c ejdbutl.h encoding.c encoding.h @@ -81,8 +80,6 @@ tcutest.c tcutil.c tcutil.h - timsort-impl.h - timsort.c utf8proc.c utf8proc.h utf8proc_data.c @@ -137,28 +134,6 @@ - - - - - - - - - - - - - - - - - - - - - - @@ -333,14 +308,6 @@ - - - - NDEBUG - _TC_APPLIBS="-L/usr/local/lib -ltcejdb -lbz2 -lz -lrt -lpthread -lm -lc " - - - diff --git a/tcejdb/tcejdb.iml b/tcejdb/tcejdb.iml index 5fa768d..39d622a 100644 --- a/tcejdb/tcejdb.iml +++ b/tcejdb/tcejdb.iml @@ -9,6 +9,7 @@ + diff --git a/tcejdb/timsort-impl.h b/tcejdb/timsort-impl.h deleted file mode 100644 index 4500fe7..0000000 --- a/tcejdb/timsort-impl.h +++ /dev/null @@ -1,798 +0,0 @@ -/* - * Copyright (C) 2011 Patrick O. Perry - * Copyright (C) 2008 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -static void NAME(binarySort) (void *a, size_t hi, size_t start, - comparator compare, void *opaque, size_t width); -static size_t NAME(countRunAndMakeAscending) (void *a, size_t hi, - comparator compare, - void *opaque, - size_t width); -static void NAME(reverseRange) (void *a, size_t hi, size_t width); -static int NAME(mergeCollapse) (struct timsort * ts, size_t width); -static int NAME(mergeForceCollapse) (struct timsort * ts, size_t width); -static int NAME(mergeAt) (struct timsort * ts, size_t i, size_t width); -static size_t NAME(gallopLeft) (void *key, void *base, size_t len, - size_t hint, comparator compare, void *opaque, size_t width); -static size_t NAME(gallopRight) (void *key, void *base, size_t len, - size_t hint, comparator compare, void *opaque, size_t width); -static int NAME(mergeLo) (struct timsort * ts, void *base1, size_t len1, - void *base2, size_t len2, size_t width); -static int NAME(mergeHi) (struct timsort * ts, void *base1, size_t len1, - void *base2, size_t len2, size_t width); - -static int NAME(timsort) (void *a, size_t nel, size_t width, - int (*c) (const void *, const void *, void *opaque), void *opaque) { - assert(a || !nel || !width); - assert(c); - - int err = SUCCESS; - - if (nel < 2 || !width) - return err; // Arrays of size 0 and 1 are always sorted - - // If array is small, do a "mini-TimSort" with no merges - if (nel < MIN_MERGE) { - size_t initRunLen = - CALL(countRunAndMakeAscending) (a, nel, c, opaque, width); - CALL(binarySort) (a, nel, initRunLen, c, opaque, width); - return err; - } - - /** - * March over the array once, left to right, finding natural runs, - * extending short natural runs to minRun elements, and merging runs - * to maintain stack invariant. - */ - struct timsort ts; - if ((err = timsort_init(&ts, a, nel, c, opaque, width))) - return err; - - size_t minRun = minRunLength(nel); - do { - // Identify next run - size_t runLen = - CALL(countRunAndMakeAscending) (a, nel, c, opaque, width); - - // If run is short, extend to min(minRun, nel) - if (runLen < minRun) { - size_t force = nel <= minRun ? nel : minRun; - CALL(binarySort) (a, force, runLen, c, opaque, width); - runLen = force; - } - // Push run onto pending-run stack, and maybe merge - pushRun(&ts, a, runLen); - if ((err = CALL(mergeCollapse) (&ts, width))) - goto out; - - // Advance to find next run - a = ELEM(a, runLen); - nel -= runLen; - } while (nel != 0); - - // Merge all remaining runs to complete sort - if ((err = CALL(mergeForceCollapse) (&ts, width))) - goto out; - - assert(ts.stackSize == 1); -out: - timsort_deinit(&ts); - return err; -} - -/** - * Sorts the specified portion of the specified array using a binary - * insertion sort. This is the best method for sorting small numbers - * of elements. It requires O(n log n) compares, but O(n^2) data - * movement (worst case). - * - * If the initial part of the specified range is already sorted, - * this method can take advantage of it: the method assumes that the - * elements from index {@code lo}, inclusive, to {@code start}, - * exclusive are already sorted. - * - * @param a the array in which a range is to be sorted - * @param hi the index after the last element in the range to be sorted - * @param start the index of the first element in the range that is - * not already known to be sorted ({@code lo <= start <= hi}) - * @param c comparator to used for the sort - */ -static void NAME(binarySort) (void *a, size_t hi, size_t start, - comparator compare, void *opaque, size_t width) { - assert(start <= hi); - - DEFINE_TEMP(pivot); - - if (start == 0) - start++; - - char *startp = ELEM(a, start); - - for (; start < hi; start++, startp = INCPTR(startp)) { - - // Set left (and right) to the index where a[start] (pivot) belongs - char *leftp = a; - size_t right = start; - - /* - * Invariants: - * pivot >= all in [0, left). - * pivot < all in [right, start). - */ - while (0 < right) { - size_t mid = right >> 1; - void *midp = ELEM(leftp, mid); - if (compare(startp, midp, opaque) < 0) { - right = mid; - } else { - leftp = INCPTR(midp); - right -= (mid + 1); - } - } - assert(0 == right); - - /* - * The invariants still hold: pivot >= all in [lo, left) and - * pivot < all in [left, start), so pivot belongs at left. Note - * that if there are elements equal to pivot, left points to the - * first slot after them -- that's why this sort is stable. - * Slide elements over to make room to make room for pivot. - */ - size_t n = startp - leftp; // The number of bytes to move - - ASSIGN(pivot, startp); - memmove(INCPTR(leftp), leftp, n); - - // a[left] = pivot; - ASSIGN(leftp, pivot); - } -} - -/** - * Returns the length of the run beginning at the specified position in - * the specified array and reverses the run if it is descending (ensuring - * that the run will always be ascending when the method returns). - * - * A run is the longest ascending sequence with: - * - * a[0] <= a[1] <= a[2] <= ... - * - * or the longest descending sequence with: - * - * a[0] > a[1] > a[2] > ... - * - * For its intended use in a stable mergesort, the strictness of the - * definition of "descending" is needed so that the call can safely - * reverse a descending sequence without violating stability. - * - * @param a the array in which a run is to be counted and possibly reversed - * @param hi index after the last element that may be contained in the run. - * It is required that {@code 0 < hi}. - * @param compare the comparator to used for the sort - * @return the length of the run beginning at the specified position in - * the specified array - */ -static size_t NAME(countRunAndMakeAscending) (void *a, size_t hi, - comparator compare, void *opaque, size_t width) { - assert(0 < hi); - size_t runHi = 1; - if (runHi == hi) - return 1; - - char *cur = INCPTR(a); - char *next = INCPTR(cur); - runHi++; - - // Find end of run, and reverse range if descending - if (compare(cur, a, opaque) < 0) { // Descending - while (runHi < hi && compare(next, cur, opaque) < 0) { - runHi++; - cur = next; - next = INCPTR(next); - } - CALL(reverseRange) (a, runHi, width); - } else { // Ascending - while (runHi < hi && compare(next, cur, opaque) >= 0) { - runHi++; - cur = next; - next = INCPTR(next); - } - } - - return runHi; -} - -/** - * Reverse the specified range of the specified array. - * - * @param a the array in which a range is to be reversed - * @param hi the index after the last element in the range to be reversed - */ -static void NAME(reverseRange) (void *a, size_t hi, size_t width) { - assert(hi > 0); - - DEFINE_TEMP(t); - - char *front = a; - char *back = ELEM(a, hi - 1); - - while (front < back) { - ASSIGN(t, front); - ASSIGN(front, back); - ASSIGN(back, t); - front = INCPTR(front); - back = DECPTR(back); - } -} - -/** - * Examines the stack of runs waiting to be merged and merges adjacent runs - * until the stack invariants are reestablished: - * - * 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1] - * 2. runLen[i - 2] > runLen[i - 1] - * - * This method is called each time a new run is pushed onto the stack, - * so the invariants are guaranteed to hold for i < stackSize upon - * entry to the method. - */ -static int NAME(mergeCollapse) (struct timsort * ts, size_t width) { - int err = SUCCESS; - - while (ts->stackSize > 1) { - size_t n = ts->stackSize - 2; - if (n > 0 - && ts->run[n - 1].len <= - ts->run[n].len + ts->run[n + 1].len) { - if (ts->run[n - 1].len < ts->run[n + 1].len) - n--; - err = CALL(mergeAt) (ts, n, width); - if (err) - break; - } else if (ts->run[n].len <= ts->run[n + 1].len) { - err = CALL(mergeAt) (ts, n, width); - if (err) - break; - } else { - break; // Invariant is established - } - } - - return err; -} - -/** - * Merges all runs on the stack until only one remains. This method is - * called once, to complete the sort. - */ -static int NAME(mergeForceCollapse) (struct timsort * ts, size_t width) { - int err = SUCCESS; - - while (ts->stackSize > 1) { - size_t n = ts->stackSize - 2; - if (n > 0 && ts->run[n - 1].len < ts->run[n + 1].len) - n--; - err = CALL(mergeAt) (ts, n, width); - if (err) - break; - } - - return err; -} - -/** - * Merges the two runs at stack indices i and i+1. Run i must be - * the penultimate or antepenultimate run on the stack. In other words, - * i must be equal to stackSize-2 or stackSize-3. - * - * @param i stack index of the first of the two runs to merge - */ -static int NAME(mergeAt) (struct timsort * ts, size_t i, size_t width) { - assert(ts->stackSize >= 2); - assert(i == ts->stackSize - 2 || i == ts->stackSize - 3); - - void *base1 = ts->run[i].base; - size_t len1 = ts->run[i].len; - void *base2 = ts->run[i + 1].base; - size_t len2 = ts->run[i + 1].len; - assert(len1 > 0 && len2 > 0); - assert(ELEM(base1, len1) == base2); - - /* - * Record the length of the combined runs; if i is the 3rd-last - * run now, also slide over the last run (which isn't involved - * in this merge). The current run (i+1) goes away in any case. - */ - ts->run[i].len = len1 + len2; - if (i == ts->stackSize - 3) { - ts->run[i + 1] = ts->run[i + 2]; - } - ts->stackSize--; - - /* - * Find where the first element of run2 goes in run1. Prior elements - * in run1 can be ignored (because they're already in place). - */ - size_t k = CALL(gallopRight) (base2, base1, len1, 0, ts->c, ts->opaque, width); - base1 = ELEM(base1, k); - len1 -= k; - if (len1 == 0) - return SUCCESS; - - /* - * Find where the last element of run1 goes in run2. Subsequent elements - * in run2 can be ignored (because they're already in place). - */ - len2 = - CALL(gallopLeft) (ELEM(base1, len1 - 1), base2, len2, len2 - 1, - ts->c, ts->opaque, width); - if (len2 == 0) - return SUCCESS; - - // Merge remaining runs, using tmp array with min(len1, len2) elements - if (len1 <= len2) - return CALL(mergeLo) (ts, base1, len1, base2, len2, width); - else - return CALL(mergeHi) (ts, base1, len1, base2, len2, width); -} - -/** - * Locates the position at which to insert the specified key into the - * specified sorted range; if the range contains an element equal to key, - * returns the index of the leftmost equal element. - * - * @param key the key whose insertion point to search for - * @param base the array in which to search - * @param len the length of the range; must be > 0 - * @param hint the index at which to begin the search, 0 <= hint < n. - * The closer hint is to the result, the faster this method will run. - * @param c the comparator used to order the range, and to search - * @return the int k, 0 <= k <= n such that a[b + k - 1] < key <= a[b + k], - * pretending that a[b - 1] is minus infinity and a[b + n] is infinity. - * In other words, key belongs at index b + k; or in other words, - * the first k elements of a should precede key, and the last n - k - * should follow it. - */ -static size_t NAME(gallopLeft) (void *key, void *base, size_t len, - size_t hint, comparator compare, - void *opaque, - size_t width) { - assert(len > 0 && hint < len); - char *hintp = ELEM(base, hint); - size_t lastOfs = 0; - size_t ofs = 1; - - if (compare(key, hintp, opaque) > 0) { - // Gallop right until a[hint+lastOfs] < key <= a[hint+ofs] - size_t maxOfs = len - hint; - while (ofs < maxOfs - && compare(key, ELEM(hintp, ofs), opaque) > 0) { - lastOfs = ofs; - ofs = (ofs << 1) + 1; // eventually this becomes SIZE_MAX - } - if (ofs > maxOfs) - ofs = maxOfs; - - // Make offsets relative to base - lastOfs += hint + 1; // POP: we add 1 here so lastOfs stays non-negative - ofs += hint; - } else { // key <= a[hint] - // Gallop left until a[hint-ofs] < key <= a[hint-lastOfs] - const size_t maxOfs = hint + 1; - while (ofs < maxOfs - && compare(key, ELEM(hintp, -ofs), opaque) <= 0) { - lastOfs = ofs; - ofs = (ofs << 1) + 1; // no need to check for overflow - } - if (ofs > maxOfs) - ofs = maxOfs; - - // Make offsets relative to base - size_t tmp = lastOfs; - lastOfs = hint + 1 - ofs; // POP: we add 1 here so lastOfs stays non-negative - ofs = hint - tmp; - } - assert(lastOfs <= ofs && ofs <= len); - - /* - * Now a[lastOfs-1] < key <= a[ofs], so key belongs somewhere - * to the right of lastOfs but no farther right than ofs. Do a binary - * search, with invariant a[lastOfs - 1] < key <= a[ofs]. - */ - // lastOfs++; POP: we added 1 above to keep lastOfs non-negative - while (lastOfs < ofs) { - //size_t m = lastOfs + ((ofs - lastOfs) >> 1); - // http://stackoverflow.com/questions/4844165/safe-integer-middle-value-formula - size_t m = (lastOfs & ofs) + ((lastOfs ^ ofs) >> 1); - - if (compare(key, ELEM(base, m), opaque) > 0) - lastOfs = m + 1; // a[m] < key - else - ofs = m; // key <= a[m] - } - assert(lastOfs == ofs); // so a[ofs - 1] < key <= a[ofs] - return ofs; -} - -/** - * Like gallopLeft, except that if the range contains an element equal to - * key, gallopRight returns the index after the rightmost equal element. - * - * @param key the key whose insertion point to search for - * @param base the array in which to search - * @param len the length of the range; must be > 0 - * @param hint the index at which to begin the search, 0 <= hint < n. - * The closer hint is to the result, the faster this method will run. - * @param c the comparator used to order the range, and to search - * @return the int k, 0 <= k <= n such that a[b + k - 1] <= key < a[b + k] - */ -static size_t NAME(gallopRight) (void *key, void *base, size_t len, - size_t hint, comparator compare, - void *opaque, - size_t width) { - assert(len > 0 && hint < len); - - char *hintp = ELEM(base, hint); - size_t ofs = 1; - size_t lastOfs = 0; - - if (compare(key, hintp, opaque) < 0) { - // Gallop left until a[hint - ofs] <= key < a[hint - lastOfs] - size_t maxOfs = hint + 1; - while (ofs < maxOfs - && compare(key, ELEM(hintp, -ofs), opaque) < 0) { - lastOfs = ofs; - ofs = (ofs << 1) + 1; // no need to check for overflow - } - if (ofs > maxOfs) - ofs = maxOfs; - - // Make offsets relative to base - size_t tmp = lastOfs; - lastOfs = hint + 1 - ofs; - ofs = hint - tmp; - } else { // a[hint] <= key - // Gallop right until a[hint + lastOfs] <= key < a[hint + ofs] - size_t maxOfs = len - hint; - while (ofs < maxOfs - && compare(key, ELEM(hintp, ofs), opaque) >= 0) { - lastOfs = ofs; - ofs = (ofs << 1) + 1; // no need to check for overflow - } - if (ofs > maxOfs) - ofs = maxOfs; - - // Make offsets relative to base - lastOfs += hint + 1; - ofs += hint; - } - assert(lastOfs <= ofs && ofs <= len); - - /* - * Now a[lastOfs - 1] <= key < a[ofs], so key belongs somewhere to - * the right of lastOfs but no farther right than ofs. Do a binary - * search, with invariant a[lastOfs - 1] <= key < a[ofs]. - */ - while (lastOfs < ofs) { - // size_t m = lastOfs + ((ofs - lastOfs) >> 1); - size_t m = (lastOfs & ofs) + ((lastOfs ^ ofs) >> 1); - - if (compare(key, ELEM(base, m), opaque) < 0) - ofs = m; // key < a[m] - else - lastOfs = m + 1; // a[m] <= key - } - assert(lastOfs == ofs); // so a[ofs - 1] <= key < a[ofs] - return ofs; -} - -/** - * Merges two adjacent runs in place, in a stable fashion. The first - * element of the first run must be greater than the first element of the - * second run (a[base1] > a[base2]), and the last element of the first run - * (a[base1 + len1-1]) must be greater than all elements of the second run. - * - * For performance, this method should be called only when len1 <= len2; - * its twin, mergeHi should be called if len1 >= len2. (Either method - * may be called if len1 == len2.) - * - * @param base1 first element in first run to be merged - * @param len1 length of first run to be merged (must be > 0) - * @param base2 first element in second run to be merged - * (must be aBase + aLen) - * @param len2 length of second run to be merged (must be > 0) - */ -static int NAME(mergeLo) (struct timsort * ts, void *base1, size_t len1, - void *base2, size_t len2, size_t width) { - assert(len1 > 0 && len2 > 0 && ELEM(base1, len1) == base2); - - // Copy first run into temp array - void *tmp = ensureCapacity(ts, len1, width); - if (!tmp) - return FAILURE; - - // System.arraycopy(a, base1, tmp, 0, len1); - memcpy(tmp, base1, LEN(len1)); - - char *cursor1 = tmp; // Indexes into tmp array - char *cursor2 = base2; // Indexes int a - char *dest = base1; // Indexes int a - - // Move first element of second run and deal with degenerate cases - // a[dest++] = a[cursor2++]; - ASSIGN(dest, cursor2); - dest = INCPTR(dest); - cursor2 = INCPTR(cursor2); - - if (--len2 == 0) { - memcpy(dest, cursor1, LEN(len1)); - return SUCCESS; - } - if (len1 == 1) { - memcpy(dest, cursor2, LEN(len2)); - - // a[dest + len2] = tmp[cursor1]; // Last elt of run 1 to end of merge - ASSIGN(ELEM(dest, len2), cursor1); - return SUCCESS; - } - - comparator compare = ts->c; // Use local variable for performance - size_t minGallop = ts->minGallop; // " " " " " - - while (1) { - size_t count1 = 0; // Number of times in a row that first run won - size_t count2 = 0; // Number of times in a row that second run won - - /* - * Do the straightforward thing until (if ever) one run starts - * winning consistently. - */ - do { - assert(len1 > 1 && len2 > 0); - if (compare(cursor2, cursor1, ts->opaque) < 0) { - ASSIGN(dest, cursor2); - dest = INCPTR(dest); - cursor2 = INCPTR(cursor2); - count2++; - count1 = 0; - if (--len2 == 0) - goto outer; - if (count2 >= minGallop) - break; - } else { - ASSIGN(dest, cursor1); - dest = INCPTR(dest); - cursor1 = INCPTR(cursor1); - count1++; - count2 = 0; - if (--len1 == 1) - goto outer; - if (count1 >= minGallop) - break; - } - } while (1); // (count1 | count2) < minGallop); - - /* - * One run is winning so consistently that galloping may be a - * huge win. So try that, and continue galloping until (if ever) - * neither run appears to be winning consistently anymore. - */ - do { - assert(len1 > 1 && len2 > 0); - count1 = - CALL(gallopRight) (cursor2, cursor1, len1, 0, - compare, ts->opaque, width); - if (count1 != 0) { - memcpy(dest, cursor1, LEN(count1)); - dest = ELEM(dest, count1); - cursor1 = ELEM(cursor1, count1); - len1 -= count1; - if (len1 <= 1) // len1 == 1 || len1 == 0 - goto outer; - } - ASSIGN(dest, cursor2); - dest = INCPTR(dest); - cursor2 = INCPTR(cursor2); - if (--len2 == 0) - goto outer; - - count2 = - CALL(gallopLeft) (cursor1, cursor2, len2, 0, - compare, ts->opaque, width); - if (count2 != 0) { - memcpy(dest, cursor2, LEN(count2)); - dest = ELEM(dest, count2); - cursor2 = ELEM(cursor2, count2); - len2 -= count2; - if (len2 == 0) - goto outer; - } - ASSIGN(dest, cursor1); - dest = INCPTR(dest); - cursor1 = INCPTR(cursor1); - if (--len1 == 1) - goto outer; - if (minGallop > 0) - minGallop--; - } while (count1 >= MIN_GALLOP || count2 >= MIN_GALLOP); - minGallop += 2; // Penalize for leaving gallop mode - } // End of "outer" loop -outer: - ts->minGallop = minGallop < 1 ? 1 : minGallop; // Write back to field - - if (len1 == 1) { - assert(len2 > 0); - memcpy(dest, cursor2, LEN(len2)); - ASSIGN(ELEM(dest, len2), cursor1); // Last elt of run 1 to end of merge - - } else if (len1 == 0) { - errno = EINVAL; // Comparison method violates its general contract - return FAILURE; - } else { - assert(len2 == 0); - assert(len1 > 1); - memcpy(dest, cursor1, LEN(len1)); - } - return SUCCESS; -} - -/** - * Like mergeLo, except that this method should be called only if - * len1 >= len2; mergeLo should be called if len1 <= len2. (Either method - * may be called if len1 == len2.) - * - * @param base1 first element in first run to be merged - * @param len1 length of first run to be merged (must be > 0) - * @param base2 first element in second run to be merged - * (must be aBase + aLen) - * @param len2 length of second run to be merged (must be > 0) - */ -static int NAME(mergeHi) (struct timsort *ts, void *base1, size_t len1, - void *base2, size_t len2, size_t width) { - assert(len1 > 0 && len2 > 0 && ELEM(base1, len1) == base2); - - // Copy second run into temp array - void *tmp = ensureCapacity(ts, len2, width); - if (!tmp) - return FAILURE; - - memcpy(tmp, base2, LEN(len2)); - - char *cursor1 = ELEM(base1, len1 - 1); // Indexes into a - char *cursor2 = ELEM(tmp, len2 - 1); // Indexes into tmp array - char *dest = ELEM(base2, len2 - 1); // Indexes into a - - // Move last element of first run and deal with degenerate cases - // a[dest--] = a[cursor1--]; - ASSIGN(dest, cursor1); - dest = DECPTR(dest); - cursor1 = DECPTR(cursor1); - if (--len1 == 0) { - memcpy(ELEM(dest, -(len2 - 1)), tmp, LEN(len2)); - return SUCCESS; - } - if (len2 == 1) { - dest = ELEM(dest, -len1); - cursor1 = ELEM(cursor1, -len1); - memcpy(ELEM(dest, 1), ELEM(cursor1, 1), LEN(len1)); - // a[dest] = tmp[cursor2]; - ASSIGN(dest, cursor2); - return SUCCESS; - } - - comparator compare = ts->c; // Use local variable for performance - size_t minGallop = ts->minGallop; // " " " " " - - while (1) { - size_t count1 = 0; // Number of times in a row that first run won - size_t count2 = 0; // Number of times in a row that second run won - - /* - * Do the straightforward thing until (if ever) one run - * appears to win consistently. - */ - do { - assert(len1 > 0 && len2 > 1); - if (compare(cursor2, cursor1, ts->opaque) < 0) { - ASSIGN(dest, cursor1); - dest = DECPTR(dest); - cursor1 = DECPTR(cursor1); - count1++; - count2 = 0; - if (--len1 == 0) - goto outer; - } else { - ASSIGN(dest, cursor2); - dest = DECPTR(dest); - cursor2 = DECPTR(cursor2); - count2++; - count1 = 0; - if (--len2 == 1) - goto outer; - } - } while ((count1 | count2) < minGallop); - - /* - * One run is winning so consistently that galloping may be a - * huge win. So try that, and continue galloping until (if ever) - * neither run appears to be winning consistently anymore. - */ - do { - assert(len1 > 0 && len2 > 1); - count1 = - len1 - CALL(gallopRight) (cursor2, base1, - len1, len1 - 1, compare, ts->opaque, - width); - if (count1 != 0) { - dest = ELEM(dest, -count1); - cursor1 = ELEM(cursor1, -count1); - len1 -= count1; - memcpy(INCPTR(dest), INCPTR(cursor1), - LEN(count1)); - if (len1 == 0) - goto outer; - } - ASSIGN(dest, cursor2); - dest = DECPTR(dest); - cursor2 = DECPTR(cursor2); - if (--len2 == 1) - goto outer; - - count2 = - len2 - CALL(gallopLeft) (cursor1, tmp, len2, - len2 - 1, compare, - ts->opaque, - width); - if (count2 != 0) { - dest = ELEM(dest, -count2); - cursor2 = ELEM(cursor2, -count2); - len2 -= count2; - memcpy(INCPTR(dest), - INCPTR(cursor2), LEN(count2)); - if (len2 <= 1) // len2 == 1 || len2 == 0 - goto outer; - } - ASSIGN(dest, cursor1); - dest = DECPTR(dest); - cursor1 = DECPTR(cursor1); - if (--len1 == 0) - goto outer; - if (minGallop > 0) - minGallop--; - } while (count1 >= MIN_GALLOP || count2 >= MIN_GALLOP); - minGallop += 2; // Penalize for leaving gallop mode - } // End of "outer" loop -outer: - ts->minGallop = minGallop < 1 ? 1 : minGallop; // Write back to field - - if (len2 == 1) { - assert(len1 > 0); - dest = ELEM(dest, -len1); - cursor1 = ELEM(cursor1, -len1); - memcpy(INCPTR(dest), INCPTR(cursor1), LEN(len1)); - // a[dest] = tmp[cursor2]; // Move first elt of run2 to front of merge - ASSIGN(dest, cursor2); - } else if (len2 == 0) { - errno = EINVAL; // Comparison method violates its general contract - return FAILURE; - } else { - assert(len1 == 0); - assert(len2 > 0); - memcpy(ELEM(dest, -(len2 - 1)), tmp, LEN(len2)); - } - - return SUCCESS; -} \ No newline at end of file diff --git a/tcejdb/timsort.c b/tcejdb/timsort.c deleted file mode 100644 index a5af467..0000000 --- a/tcejdb/timsort.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (C) 2011 Patrick O. Perry - * Copyright (C) 2008 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include // assert -#include // EINVAL -#include // size_t, NULL -#include // malloc, free -#include // memcpy, memmove -#include "ejdbutl.h" - -/** - * This is the minimum sized sequence that will be merged. Shorter - * sequences will be lengthened by calling binarySort. If the entire - * array is less than this length, no merges will be performed. - * - * This constant should be a power of two. It was 64 in Tim Peter's C - * implementation, but 32 was empirically determined to work better in - * [Android's Java] implementation. In the unlikely event that you set - * this constant to be a number that's not a power of two, you'll need - * to change the {@link #minRunLength} computation. - * - * If you decrease this constant, you must change the stackLen - * computation in the TimSort constructor, or you risk an - * ArrayOutOfBounds exception. See listsort.txt for a discussion - * of the minimum stack length required as a function of the length - * of the array being sorted and the minimum merge sequence length. - */ -#define MIN_MERGE 32 - -/** - * When we get into galloping mode, we stay there until both runs win less - * often than MIN_GALLOP consecutive times. - */ -#define MIN_GALLOP 7 - -/** - * Maximum initial size of tmp array, which is used for merging. The array - * can grow to accommodate demand. - * - * Unlike Tim's original C version, we do not allocate this much storage - * when sorting smaller arrays. This change was required for performance. - */ -#define INITIAL_TMP_STORAGE_LENGTH 256 - -/** - * Maximum stack size. This depends on MIN_MERGE and sizeof(size_t). - */ -#define MAX_STACK 85 - -/** - * Define MALLOC_STACK if you want to allocate the run stack on the heap. - * Otherwise, 2* MAX_STACK * sizeof(size_t) ~ 1.3K gets reserved on the - * call stack. - */ -/* #undef MALLOC_STACK */ - -#define DEFINE_TEMP(temp) char temp[WIDTH] -#define ASSIGN(x, y) memcpy(x, y, WIDTH) -#define INCPTR(x) ((void *)((char *)(x) + WIDTH)) -#define DECPTR(x) ((void *)((char *)(x) - WIDTH)) -#define ELEM(a,i) ((char *)(a) + (i) * WIDTH) -#define LEN(n) ((n) * WIDTH) - -#ifndef MIN -#define MIN(a,b) ((a) <= (b) ? (a) : (b)) -#endif -#define SUCCESS 0 -#define FAILURE (-1) - -#define CONCAT(x, y) x ## _ ## y -#define MAKE_STR(x, y) CONCAT(x,y) -#define NAME(x) MAKE_STR(x, WIDTH) -#define CALL(x) NAME(x) - -typedef int (*comparator) (const void *x, const void *y, void *opaque); - -struct timsort_run { - void *base; - size_t len; -}; - -struct timsort { - /** - * The array being sorted. - */ - void *a; - size_t a_length; - - /** - * The comparator for this sort. - */ - int (*c) (const void *x, const void *y, void *opaque); - - void *opaque; - - /** - * This controls when we get *into* galloping mode. It is initialized - * to MIN_GALLOP. The mergeLo and mergeHi methods nudge it higher for - * random data, and lower for highly structured data. - */ - size_t minGallop; - - /** - * Temp storage for merges. - */ - void *tmp; - size_t tmp_length; - - /** - * A stack of pending runs yet to be merged. Run i starts at - * address base[i] and extends for len[i] elements. It's always - * true (so long as the indices are in bounds) that: - * - * runBase[i] + runLen[i] == runBase[i + 1] - * - * so we could cut the storage for this, but it's a minor amount, - * and keeping all the info explicit simplifies the code. - */ - size_t stackSize; // Number of pending runs on stack - size_t stackLen; // maximum stack size -#ifdef MALLOC_STACK - struct timsort_run *run; -#else - struct timsort_run run[MAX_STACK]; -#endif -}; - -static int timsort_init(struct timsort *ts, void *a, size_t len, - int (*c) (const void *, const void *, void *opaque), - void *opaque, - size_t width); -static void timsort_deinit(struct timsort *ts); -static size_t minRunLength(size_t n); -static void pushRun(struct timsort *ts, void *runBase, size_t runLen); -static void *ensureCapacity(struct timsort *ts, size_t minCapacity, - size_t width); - -/** - * Creates a TimSort instance to maintain the state of an ongoing sort. - * - * @param a the array to be sorted - * @param nel the length of the array - * @param c the comparator to determine the order of the sort - * @param width the element width - */ -static int timsort_init(struct timsort *ts, void *a, size_t len, - int (*c) (const void *, const void *, void *opaque), - void *opaque, - size_t width) { - assert(ts); - assert(a || !len); - assert(c); - - ts->minGallop = MIN_GALLOP; - ts->stackSize = 0; - - ts->a = a; - ts->a_length = len; - ts->c = c; - ts->opaque = opaque; - - // Allocate temp storage (which may be increased later if necessary) - ts->tmp_length = (len < 2 * INITIAL_TMP_STORAGE_LENGTH ? - len >> 1 : INITIAL_TMP_STORAGE_LENGTH); - ts->tmp = malloc(ts->tmp_length * width); - - /* - * Allocate runs-to-be-merged stack (which cannot be expanded). The - * stack length requirements are described in listsort.txt. The C - * version always uses the same stack length (85), but this was - * measured to be too expensive when sorting "mid-sized" arrays (e.g., - * 100 elements) in Java. Therefore, we use smaller (but sufficiently - * large) stack lengths for smaller arrays. The "magic numbers" in the - * computation below must be changed if MIN_MERGE is decreased. See - * the MIN_MERGE declaration above for more information. - */ - - /* POP: - * In listsort.txt, Tim argues that the run lengths form a decreasing - * sequence, and each run length is greater than the previous two. - * Thus, lower bounds on the minimum runLen numbers on the stack are: - * - * [ 1 = b[1] - * , minRun = b[2] - * , 1 * minRun + 2 = b[3] - * , 2 * minRun + 3 = b[4] - * , 3 * minRun + 6 = b[5] - * , ... - * ], - * - * Moreover, minRun >= MIN_MERGE / 2. Also, note that the sum of the - * run lenghts is less than or equal to the length of the array. - * - * Let s be the stack length and n be the array length. If s >= 2, then n >= b[1] + b[2]. - * More generally, if s >= m, then n >= b[1] + b[2] + ... + b[m] = B[m]. Conversely, if - * n < B[m], then s < m. - * - * In Haskell, we can compute the bin sizes using the fibonacci numbers - * - * fibs = 1:1:(zipWith (+) fibs (tail fibs)) - * - * cumSums a = case a of { [] -> [] ; (x:xs) -> x:(map (x+) (cumSums xs)) } - * - * fibSums = cumSums fibs - * - * binSizes minRun = ([ 1, minRun, minRun + 2 ] - * ++ [ (1 + minRun) * (fibs !! (i+2)) - * + fibSums !! (i+1) - fibs !! i | i <- [0..] ]) - * - * arraySizes minRun = cumSums (binSizes minRun) - * - * We these funcitons, we can compute a table with minRun = MIN_MERGE / 2 = 16: - * - * m B[m] - * --------------------------- - * 1 17 - * 2 35 - * 3 70 - * 4 124 - * 5 214 - * 6 359 - * 11 4220 - * 17 76210 # > 2^16 - 1 - * 40 4885703256 # > 2^32 - 1 - * 86 20061275507500957239 # > 2^64 - 1 - * - * If len < B[m], then stackLen < m: - */ -#ifdef MALLOC_STACK - ts->stackLen = (len < 359 ? 5 - : len < 4220 ? 10 - : len < 76210 ? 16 : len < 4885703256ULL ? 39 : 85); - - /* Note that this is slightly more liberal than in the Java - * implementation. The discrepancy might be because the Java - * implementation uses a less accurate lower bound. - */ - //stackLen = (len < 120 ? 5 : len < 1542 ? 10 : len < 119151 ? 19 : 40); - - ts->run = malloc(ts->stackLen * sizeof (ts->run[0])); -#else - ts->stackLen = MAX_STACK; -#endif - - if (ts->tmp && ts->run) { - return SUCCESS; - } else { - timsort_deinit(ts); - return FAILURE; - } -} - -static void timsort_deinit(struct timsort *ts) { - free(ts->tmp); -#ifdef MALLOC_STACK - free(ts->run); -#endif -} - -/** - * Returns the minimum acceptable run length for an array of the specified - * length. Natural runs shorter than this will be extended with - * {@link #binarySort}. - * - * Roughly speaking, the computation is: - * - * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MIN_MERGE/2. - * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k - * is close to, but strictly less than, an exact power of 2. - * - * For the rationale, see listsort.txt. - * - * @param n the length of the array to be sorted - * @return the length of the minimum run to be merged - */ -static size_t minRunLength(size_t n) { - size_t r = 0; // Becomes 1 if any 1 bits are shifted off - while (n >= MIN_MERGE) { - r |= (n & 1); - n >>= 1; - } - return n + r; -} - -/** - * Pushes the specified run onto the pending-run stack. - * - * @param runBase index of the first element in the run - * @param runLen the number of elements in the run - */ -static void pushRun(struct timsort *ts, void *runBase, size_t runLen) { - assert(ts->stackSize < ts->stackLen); - - ts->run[ts->stackSize++] = (struct timsort_run){ - runBase, runLen - }; -} - -/** - * Ensures that the external array tmp has at least the specified - * number of elements, increasing its size if necessary. The size - * increases exponentially to ensure amortized linear time complexity. - * - * @param minCapacity the minimum required capacity of the tmp array - * @return tmp, whether or not it grew - */ -static void *ensureCapacity(struct timsort *ts, size_t minCapacity, - size_t width) { - if (ts->tmp_length < minCapacity) { - // Compute smallest power of 2 > minCapacity - size_t newSize = minCapacity; - newSize |= newSize >> 1; - newSize |= newSize >> 2; - newSize |= newSize >> 4; - newSize |= newSize >> 8; - newSize |= newSize >> 16; - if (sizeof (newSize) > 4) - newSize |= newSize >> 32; - - newSize++; - newSize = MIN(newSize, ts->a_length >> 1); - if (newSize == 0) { // (overflow) Not bloody likely! - newSize = minCapacity; - } - - free(ts->tmp); - ts->tmp_length = newSize; - ts->tmp = malloc(ts->tmp_length * width); - } - - return ts->tmp; -} - -#define WIDTH 4 -#include "timsort-impl.h" -#undef WIDTH - -#define WIDTH 8 -#include "timsort-impl.h" -#undef WIDTH - -#define WIDTH 16 -#include "timsort-impl.h" -#undef WIDTH - -#define WIDTH width -#include "timsort-impl.h" -#undef WIDTH - -/** - * @param a the array to be sorted - * @param nel the length of the array - * @param c the comparator to determine the order of the sort - * @param width the element width - * @param opaque data for the comparator function - * @param opaque data for the comparator function - */ -int ejdbtimsort(void *a, size_t nel, size_t width, - int (*c) (const void*, const void*, void*), void *opaque) { - switch (width) { - case 4: - return timsort_4(a, nel, width, c, opaque); - case 8: - return timsort_8(a, nel, width, c, opaque); - case 16: - return timsort_16(a, nel, width, c, opaque); - default: - return timsort_width(a, nel, width, c, opaque); - } -} - -typedef struct { - int (*cmp)(const TCLISTDATUM*, const TCLISTDATUM*, void *opaque); - void *tcopaque; -} tclistdata; - -static inline int tclistcmp(const void* a, const void* b, void* o) { - tclistdata* op = o; - assert(op && op->cmp); - return op->cmp(a, b, op->tcopaque); -} - -int ejdbtimsortlist(TCLIST *list, - int (*compar) (const TCLISTDATUM*, const TCLISTDATUM*, void *opaque), void *opaque) { - tclistdata op; - op.cmp = compar; - op.tcopaque = opaque; - return ejdbtimsort(list->array + list->start, list->num, sizeof (TCLISTDATUM), tclistcmp, &op); -}