diff --git a/Zend/zend_sort.c b/Zend/zend_sort.c index 4fe3835edb0bb..7cbcb988146ee 100644 --- a/Zend/zend_sort.c +++ b/Zend/zend_sort.c @@ -312,7 +312,7 @@ ZEND_API void zend_insert_sort(void *base, size_t nmemb, size_t siz, compare_fun * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -ZEND_API void zend_sort(void *base, size_t nmemb, size_t siz, compare_func_t cmp, swap_func_t swp) +ZEND_API void zend_sort_unstable(void *base, size_t nmemb, size_t siz, compare_func_t cmp, swap_func_t swp) { while (1) { if (nmemb <= 16) { @@ -372,3 +372,53 @@ ZEND_API void zend_sort(void *base, size_t nmemb, size_t siz, compare_func_t cmp } } /* }}} */ + +/** + * A stable, adaptive, iterative mergesort that requires far fewer than + * n lg(n) comparisons when running on partially sorted arrays, while + * offering performance comparable to a traditional mergesort when run + * on random arrays. Like all proper mergesorts, this sort is stable and + * runs O(n log n) time (worst case). In the worst case, this sort requires + * temporary storage space for n/2 object references; in the best case, + * it requires only a small constant amount of space. + * + * This implementation was adapted from Josh Bloch's Java implementation of + * Tim Peters's list sort for Python, which is described in detail here: + * + * http://svn.python.org/projects/python/trunk/Objects/listsort.txt + * + * Tim's C code may be found here: + * + * http://svn.python.org/projects/python/trunk/Objects/listobject.c + * + * Josh's (Apache 2.0 Licenced) Java code may be found here: + * + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/main/java/util/TimSort.java?view=co + * + * The underlying techniques are described in this paper (and may have + * even earlier origins): + * + * "Optimistic Sorting and Information Theoretic Complexity" + * Peter McIlroy + * SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms), + * pp 467-474, Austin, Texas, 25-27 January 1993. + * + * While the API to this class consists solely of static methods, it is + * (privately) instantiable; a TimSort instance holds the state of an ongoing + * sort, assuming the input array is large enough to warrant the full-blown + * TimSort. Small arrays are sorted in place, using a binary insertion sort. + * + * @author Josh Bloch + * @author Patrick O. Perry + */ +int zend_timsort( + void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)); + +int zend_timsort_r( + void *base, size_t nel, size_t width, int (*compar) (const void *, const void *, void *), + void *context); + +ZEND_API void zend_sort(void *base, size_t nmemb, size_t siz, compare_func_t cmp, swap_func_t swp) +{ + zend_timsort(base, nmemb, siz, cmp); +} diff --git a/Zend/zend_timsort.c b/Zend/zend_timsort.c new file mode 100644 index 0000000000000..6aaed2c0ee0ba --- /dev/null +++ b/Zend/zend_timsort.c @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2011 Patrick O. Perry + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // assert +#include // EINVAL +#if defined(_MSC_VER) +# include // _alloca +#endif +#include // size_t, NULL +#include // malloc, free +#include // memcpy, memmove + +/** + * This is the minimum sized sequence that will be merged. Shorter + * sequences will be lengthened by calling binarySort. If the entire + * array is less than this length, no merges will be performed. + * + * This constant should be a power of two. It was 64 in Tim Peter's C + * implementation, but 32 was empirically determined to work better in + * [Android's Java] implementation. In the unlikely event that you set + * this constant to be a number that's not a power of two, you'll need + * to change the {@link #minRunLength} computation. + * + * If you decrease this constant, you must change the stackLen + * computation in the TimSort constructor, or you risk an + * ArrayOutOfBounds exception. See listsort.txt for a discussion + * of the minimum stack length required as a function of the length + * of the array being sorted and the minimum merge sequence length. + */ +#define MIN_MERGE 32 + +/** + * When we get into galloping mode, we stay there until both runs win less + * often than MIN_GALLOP consecutive times. + */ +#define MIN_GALLOP 7 + +/** + * Maximum initial size of tmp array, which is used for merging. The array + * can grow to accommodate demand. + * + * Unlike Tim's original C version, we do not allocate this much storage + * when sorting smaller arrays. This change was required for performance. + */ +#define INITIAL_TMP_STORAGE_LENGTH 256 + +/** + * Maximum stack size. This depends on MIN_MERGE and sizeof(size_t). + */ +#define MAX_STACK 85 + +/** + * Define MALLOC_STACK if you want to allocate the run stack on the heap. + * Otherwise, 2* MAX_STACK * sizeof(size_t) ~ 1.3K gets reserved on the + * call stack. + */ +/* #undef MALLOC_STACK */ + +#if defined(_MSC_VER) +# define DEFINE_TEMP(temp) void *temp = _alloca(WIDTH) +#else +# define DEFINE_TEMP(temp) char temp[WIDTH] +#endif + +#define ASSIGN(x, y) memcpy(x, y, WIDTH) +#define INCPTR(x) ((void *)((char *)(x) + WIDTH)) +#define DECPTR(x) ((void *)((char *)(x) - WIDTH)) +#define ELEM(a,i) ((char *)(a) + (i) * WIDTH) +#define LEN(n) ((n) * WIDTH) + +#define MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define SUCCESS 0 +#define FAILURE (-1) + +#define CONCAT(x, y) x ## _ ## y +#define MAKE_STR(x, y) CONCAT(x,y) +#define NAME(x) MAKE_STR(x, WIDTH) +#define CALL(x) NAME(x) + + +#ifdef IS_TIMSORT_R +/* + * Note order of elements to comparator matches that of C11 qsort_s, + * not BSD qsort_r or Windows qsort_s + */ +typedef int (*comparator) (const void *x, const void *y, void *thunk); +#define CMPPARAMS(compar, thunk) comparator compar, void *thunk +#define CMPARGS(compar, thunk) (compar), (thunk) +#define CMP(compar, thunk, x, y) (compar((x), (y), (thunk))) +#define TIMSORT zend_timsort_r + +#else + +typedef int (*comparator) (const void *x, const void *y); +#define CMPPARAMS(compar, thunk) comparator compar +#define CMPARGS(compar, thunk) (compar) +#define CMP(compar, thunk, x, y) (compar((x), (y))) +#define TIMSORT zend_timsort + +#endif /* IS_TIMSORT_R */ + +struct timsort_run { + void *base; + size_t len; +}; + +struct timsort { + /** + * The array being sorted. + */ + void *a; + size_t a_length; + + /** + * The comparator for this sort. + */ + comparator c; +#ifdef IS_TIMSORT_R + void *carg; +#endif + + /** + * This controls when we get *into* galloping mode. It is initialized + * to MIN_GALLOP. The mergeLo and mergeHi methods nudge it higher for + * random data, and lower for highly structured data. + */ + size_t minGallop; + + /** + * Temp storage for merges. + */ + void *tmp; + size_t tmp_length; + + /** + * A stack of pending runs yet to be merged. Run i starts at + * address base[i] and extends for len[i] elements. It's always + * true (so long as the indices are in bounds) that: + * + * runBase[i] + runLen[i] == runBase[i + 1] + * + * so we could cut the storage for this, but it's a minor amount, + * and keeping all the info explicit simplifies the code. + */ + size_t stackSize; // Number of pending runs on stack + size_t stackLen; // maximum stack size +#ifdef MALLOC_STACK + struct timsort_run *run; +#else + struct timsort_run run[MAX_STACK]; +#endif +}; + +static int timsort_init(struct timsort *ts, void *a, size_t len, + CMPPARAMS(c, carg), + size_t width); +static void timsort_deinit(struct timsort *ts); +static size_t minRunLength(size_t n); +static void pushRun(struct timsort *ts, void *runBase, size_t runLen); +static void *ensureCapacity(struct timsort *ts, size_t minCapacity, + size_t width); + +/** + * Creates a TimSort instance to maintain the state of an ongoing sort. + * + * @param a the array to be sorted + * @param nel the length of the array + * @param c the comparator to determine the order of the sort + * @param width the element width + */ +static int timsort_init(struct timsort *ts, void *a, size_t len, + CMPPARAMS(c, carg), + size_t width) +{ + int err = 0; + + assert(ts); + assert(a || !len); + assert(c); + assert(width); + + ts->minGallop = MIN_GALLOP; + ts->stackSize = 0; + + ts->a = a; + ts->a_length = len; + ts->c = c; +#ifdef IS_TIMSORT_R + ts->carg = carg; +#endif + + // Allocate temp storage (which may be increased later if necessary) + ts->tmp_length = (len < 2 * INITIAL_TMP_STORAGE_LENGTH ? + len >> 1 : INITIAL_TMP_STORAGE_LENGTH); + if (ts->tmp_length) { + ts->tmp = malloc(ts->tmp_length * width); + err |= ts->tmp == NULL; + } else { + ts->tmp = NULL; + } + + /* + * Allocate runs-to-be-merged stack (which cannot be expanded). The + * stack length requirements are described in listsort.txt. The C + * version always uses the same stack length (85), but this was + * measured to be too expensive when sorting "mid-sized" arrays (e.g., + * 100 elements) in Java. Therefore, we use smaller (but sufficiently + * large) stack lengths for smaller arrays. The "magic numbers" in the + * computation below must be changed if MIN_MERGE is decreased. See + * the MIN_MERGE declaration above for more information. + */ + + /* POP: + * In listsort.txt, Tim argues that the run lengths form a decreasing + * sequence, and each run length is greater than the previous two. + * Thus, lower bounds on the minimum runLen numbers on the stack are: + * + * [ 1 = b[1] + * , minRun = b[2] + * , 1 * minRun + 2 = b[3] + * , 2 * minRun + 3 = b[4] + * , 3 * minRun + 6 = b[5] + * , ... + * ], + * + * Moreover, minRun >= MIN_MERGE / 2. Also, note that the sum of the + * run lenghts is less than or equal to the length of the array. + * + * Let s be the stack length and n be the array length. If s >= 2, then n >= b[1] + b[2]. + * More generally, if s >= m, then n >= b[1] + b[2] + ... + b[m] = B[m]. Conversely, if + * n < B[m], then s < m. + * + * In Haskell, we can compute the bin sizes using the fibonacci numbers + * + * fibs = 1:1:(zipWith (+) fibs (tail fibs)) + * + * cumSums a = case a of { [] -> [] ; (x:xs) -> x:(map (x+) (cumSums xs)) } + * + * fibSums = cumSums fibs + * + * binSizes minRun = ([ 1, minRun, minRun + 2 ] + * ++ [ (1 + minRun) * (fibs !! (i+2)) + * + fibSums !! (i+1) - fibs !! i | i <- [0..] ]) + * + * arraySizes minRun = cumSums (binSizes minRun) + * + * We these funcitons, we can compute a table with minRun = MIN_MERGE / 2 = 16: + * + * m B[m] + * --------------------------- + * 1 17 + * 2 35 + * 3 70 + * 4 124 + * 5 214 + * 6 359 + * 11 4220 + * 17 76210 # > 2^16 - 1 + * 40 4885703256 # > 2^32 - 1 + * 86 20061275507500957239 # > 2^64 - 1 + * + * If len < B[m], then stackLen < m: + */ +#ifdef MALLOC_STACK + ts->stackLen = (len < 359 ? 5 + : len < 4220 ? 10 + : len < 76210 ? 16 : len < 4885703256ULL ? 39 : 85); + + /* Note that this is slightly more liberal than in the Java + * implementation. The discrepancy might be because the Java + * implementation uses a less accurate lower bound. + */ + //stackLen = (len < 120 ? 5 : len < 1542 ? 10 : len < 119151 ? 19 : 40); + + ts->run = malloc(ts->stackLen * sizeof(ts->run[0])); + err |= ts->run == NULL; +#else + ts->stackLen = MAX_STACK; +#endif + + if (!err) { + return SUCCESS; + } else { + timsort_deinit(ts); + return FAILURE; + } +} + +static void timsort_deinit(struct timsort *ts) +{ + free(ts->tmp); +#ifdef MALLOC_STACK + free(ts->run); +#endif +} + +/** + * Returns the minimum acceptable run length for an array of the specified + * length. Natural runs shorter than this will be extended with + * {@link #binarySort}. + * + * Roughly speaking, the computation is: + * + * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff). + * Else if n is an exact power of 2, return MIN_MERGE/2. + * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k + * is close to, but strictly less than, an exact power of 2. + * + * For the rationale, see listsort.txt. + * + * @param n the length of the array to be sorted + * @return the length of the minimum run to be merged + */ +static size_t minRunLength(size_t n) +{ + size_t r = 0; // Becomes 1 if any 1 bits are shifted off + while (n >= MIN_MERGE) { + r |= (n & 1); + n >>= 1; + } + return n + r; +} + +/** + * Pushes the specified run onto the pending-run stack. + * + * @param runBase index of the first element in the run + * @param runLen the number of elements in the run + */ +static void pushRun(struct timsort *ts, void *runBase, size_t runLen) +{ + assert(ts->stackSize < ts->stackLen); + + ts->run[ts->stackSize].base = runBase; + ts->run[ts->stackSize].len = runLen; + ts->stackSize++; +} + +/** + * Ensures that the external array tmp has at least the specified + * number of elements, increasing its size if necessary. The size + * increases exponentially to ensure amortized linear time complexity. + * + * @param minCapacity the minimum required capacity of the tmp array + * @return tmp, whether or not it grew + */ +static void *ensureCapacity(struct timsort *ts, size_t minCapacity, + size_t width) +{ + if (ts->tmp_length < minCapacity) { + // Compute smallest power of 2 > minCapacity + size_t newSize = minCapacity; + newSize |= newSize >> 1; + newSize |= newSize >> 2; + newSize |= newSize >> 4; + newSize |= newSize >> 8; + newSize |= newSize >> 16; + if (sizeof(newSize) > 4) + newSize |= newSize >> 32; + + newSize++; + newSize = MIN(newSize, ts->a_length >> 1); + if (newSize == 0) { // (overflow) Not bloody likely! + newSize = minCapacity; + } + + free(ts->tmp); + ts->tmp_length = newSize; + ts->tmp = malloc(ts->tmp_length * width); + } + + return ts->tmp; +} + +/*#define WIDTH 4 +#include "zend_timsort_impl.h" +#undef WIDTH + +#define WIDTH 8 +#include "zend_timsort_impl.h" +#undef WIDTH*/ + +#define WIDTH 32 /* sizeof(Bucket) */ +#include "zend_timsort_impl.h" +#undef WIDTH + +#define WIDTH width +#include "zend_timsort_impl.h" +#undef WIDTH + + +int TIMSORT(void *a, size_t nel, size_t width, CMPPARAMS(c, carg)) +{ + switch (width) { + /*case 4: + return timsort_4(a, nel, width, CMPARGS(c, carg)); + case 8: + return timsort_8(a, nel, width, CMPARGS(c, carg));*/ + case 32: + return timsort_32(a, nel, width, CMPARGS(c, carg)); + default: + return timsort_width(a, nel, width, CMPARGS(c, carg)); + } +} diff --git a/Zend/zend_timsort_impl.h b/Zend/zend_timsort_impl.h new file mode 100644 index 0000000000000..55e9ce8a53f30 --- /dev/null +++ b/Zend/zend_timsort_impl.h @@ -0,0 +1,837 @@ +/* + * Copyright (C) 2011 Patrick O. Perry + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +static void NAME(binarySort) (void *a, size_t hi, size_t start, + CMPPARAMS(compare, carg), size_t width); +static size_t NAME(countRunAndMakeAscending) (void *a, size_t hi, + CMPPARAMS(compare, carg), + size_t width); +static void NAME(reverseRange) (void *a, size_t hi, size_t width); +static int NAME(mergeCollapse) (struct timsort * ts, size_t width); +static int NAME(mergeForceCollapse) (struct timsort * ts, size_t width); +static int NAME(mergeAt) (struct timsort * ts, size_t i, size_t width); +static size_t NAME(gallopLeft) (void *key, void *base, size_t len, + size_t hint, CMPPARAMS(compare, carg), + size_t width); +static size_t NAME(gallopRight) (void *key, void *base, size_t len, + size_t hint, CMPPARAMS(compare, carg), + size_t width); +static int NAME(mergeLo) (struct timsort * ts, void *base1, size_t len1, + void *base2, size_t len2, size_t width); +static int NAME(mergeHi) (struct timsort * ts, void *base1, size_t len1, + void *base2, size_t len2, size_t width); + +static int NAME(timsort) (void *a, size_t nel, size_t width, CMPPARAMS(c, carg)) +{ + int err = SUCCESS; + struct timsort ts; + size_t minRun; + + assert(a || !nel || !width); + assert(c); + + if (nel < 2 || !width) + return err; // Arrays of size 0 and 1 are always sorted + + // If array is small, do a "mini-TimSort" with no merges + if (nel < MIN_MERGE) { + size_t initRunLen = + CALL(countRunAndMakeAscending) (a, nel, CMPARGS(c, carg), width); + CALL(binarySort) (a, nel, initRunLen, CMPARGS(c, carg), width); + return err; + } + + /** + * March over the array once, left to right, finding natural runs, + * extending short natural runs to minRun elements, and merging runs + * to maintain stack invariant. + */ + if ((err = timsort_init(&ts, a, nel, CMPARGS(c, carg), width))) + return err; + + minRun = minRunLength(nel); + do { + // Identify next run + size_t runLen = + CALL(countRunAndMakeAscending) (a, nel, CMPARGS(c, carg), width); + + // If run is short, extend to min(minRun, nel) + if (runLen < minRun) { + size_t force = nel <= minRun ? nel : minRun; + CALL(binarySort) (a, force, runLen, CMPARGS(c, carg), width); + runLen = force; + } + // Push run onto pending-run stack, and maybe merge + pushRun(&ts, a, runLen); + if ((err = CALL(mergeCollapse) (&ts, width))) + goto out; + + // Advance to find next run + a = ELEM(a, runLen); + nel -= runLen; + } while (nel != 0); + + // Merge all remaining runs to complete sort + if ((err = CALL(mergeForceCollapse) (&ts, width))) + goto out; + + assert(ts.stackSize == 1); +out: + timsort_deinit(&ts); + return err; +} + +/** + * Sorts the specified portion of the specified array using a binary + * insertion sort. This is the best method for sorting small numbers + * of elements. It requires O(n log n) compares, but O(n^2) data + * movement (worst case). + * + * If the initial part of the specified range is already sorted, + * this method can take advantage of it: the method assumes that the + * elements from index {@code lo}, inclusive, to {@code start}, + * exclusive are already sorted. + * + * @param a the array in which a range is to be sorted + * @param hi the index after the last element in the range to be sorted + * @param start the index of the first element in the range that is + * not already known to be sorted ({@code lo <= start <= hi}) + * @param c comparator to used for the sort + */ +static void NAME(binarySort) (void *a, size_t hi, size_t start, + CMPPARAMS(compare, carg), size_t width) { + DEFINE_TEMP(pivot); + char *startp; + + assert(start <= hi); + + if (start == 0) + start++; + + startp = ELEM(a, start); + + for (; start < hi; start++, startp = INCPTR(startp)) { + + // Set left (and right) to the index where a[start] (pivot) belongs + char *leftp = a; + size_t right = start; + size_t n; + + /* + * Invariants: + * pivot >= all in [0, left). + * pivot < all in [right, start). + */ + while (0 < right) { + size_t mid = right >> 1; + void *midp = ELEM(leftp, mid); + if (CMP(compare, carg, startp, midp) < 0) { + right = mid; + } else { + leftp = INCPTR(midp); + right -= (mid + 1); + } + } + assert(0 == right); + + /* + * The invariants still hold: pivot >= all in [lo, left) and + * pivot < all in [left, start), so pivot belongs at left. Note + * that if there are elements equal to pivot, left points to the + * first slot after them -- that's why this sort is stable. + * Slide elements over to make room to make room for pivot. + */ + n = startp - leftp; // The number of bytes to move + + ASSIGN(pivot, startp); + memmove(INCPTR(leftp), leftp, n); // POP: overlaps + + // a[left] = pivot; + ASSIGN(leftp, pivot); + } + (void)width; +} + +/** + * Returns the length of the run beginning at the specified position in + * the specified array and reverses the run if it is descending (ensuring + * that the run will always be ascending when the method returns). + * + * A run is the longest ascending sequence with: + * + * a[0] <= a[1] <= a[2] <= ... + * + * or the longest descending sequence with: + * + * a[0] > a[1] > a[2] > ... + * + * For its intended use in a stable mergesort, the strictness of the + * definition of "descending" is needed so that the call can safely + * reverse a descending sequence without violating stability. + * + * @param a the array in which a run is to be counted and possibly reversed + * @param hi index after the last element that may be contained in the run. + * It is required that {@code 0 < hi}. + * @param compare the comparator to used for the sort + * @return the length of the run beginning at the specified position in + * the specified array + */ +static size_t NAME(countRunAndMakeAscending) (void *a, size_t hi, + CMPPARAMS(compare, carg), size_t width) +{ + size_t runHi = 1; + char *cur; + char *next; + + assert(0 < hi); + if (runHi == hi) + return 1; + + cur = INCPTR(a); + next = INCPTR(cur); + runHi++; + + // Find end of run, and reverse range if descending + if (CMP(compare, carg, cur, a) < 0) { // Descending + while (runHi < hi && CMP(compare, carg, next, cur) < 0) { + runHi++; + cur = next; + next = INCPTR(next); + } + CALL(reverseRange) (a, runHi, width); + } else { // Ascending + while (runHi < hi && CMP(compare, carg, next, cur) >= 0) { + runHi++; + cur = next; + next = INCPTR(next); + } + } + + (void)width; + return runHi; +} + +/** + * Reverse the specified range of the specified array. + * + * @param a the array in which a range is to be reversed + * @param hi the index after the last element in the range to be reversed + */ +static void NAME(reverseRange) (void *a, size_t hi, size_t width) { + DEFINE_TEMP(t); + char *front = a; + char *back = ELEM(a, hi - 1); + + assert(hi > 0); + + while (front < back) { + ASSIGN(t, front); + ASSIGN(front, back); + ASSIGN(back, t); + front = INCPTR(front); + back = DECPTR(back); + } + (void)width; +} + +/** + * Examines the stack of runs waiting to be merged and merges adjacent runs + * until the stack invariants are reestablished: + * + * 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1] + * 2. runLen[i - 2] > runLen[i - 1] + * + * This method is called each time a new run is pushed onto the stack, + * so the invariants are guaranteed to hold for i < stackSize upon + * entry to the method. + * + * POP: + * Modified according to http://envisage-project.eu/wp-content/uploads/2015/02/sorting.pdf + * + * and + * + * https://bugs.openjdk.java.net/browse/JDK-8072909 (suggestion 2) + * + */ +static int NAME(mergeCollapse) (struct timsort * ts, size_t width) { + int err = SUCCESS; + + while (ts->stackSize > 1) { + size_t n = ts->stackSize - 2; + struct timsort_run *run = ts->run; + + if ((n > 0 && run[n-1].len <= run[n].len + run[n+1].len) + || (n > 1 && run[n-2].len <= run[n].len + run[n-1].len)) { + if (run[n - 1].len < run[n + 1].len) + n--; + } else if (run[n].len > run[n + 1].len) { + break; /* Invariant is established */ + } + err = CALL(mergeAt) (ts, n, width); + if (err) + break; + } + return err; +} + +/** + * Merges all runs on the stack until only one remains. This method is + * called once, to complete the sort. + */ +static int NAME(mergeForceCollapse) (struct timsort * ts, size_t width) { + int err = SUCCESS; + + while (ts->stackSize > 1) { + size_t n = ts->stackSize - 2; + if (n > 0 && ts->run[n - 1].len < ts->run[n + 1].len) + n--; + err = CALL(mergeAt) (ts, n, width); + if (err) + break; + } + return err; +} + +/** + * Merges the two runs at stack indices i and i+1. Run i must be + * the penultimate or antepenultimate run on the stack. In other words, + * i must be equal to stackSize-2 or stackSize-3. + * + * @param i stack index of the first of the two runs to merge + */ +static int NAME(mergeAt) (struct timsort * ts, size_t i, size_t width) { + void *base1 = ts->run[i].base; + size_t len1 = ts->run[i].len; + void *base2 = ts->run[i + 1].base; + size_t len2 = ts->run[i + 1].len; + size_t k; + + assert(ts->stackSize >= 2); + assert(i == ts->stackSize - 2 || i == ts->stackSize - 3); + assert(len1 > 0 && len2 > 0); + assert(ELEM(base1, len1) == base2); + + /* + * Record the length of the combined runs; if i is the 3rd-last + * run now, also slide over the last run (which isn't involved + * in this merge). The current run (i+1) goes away in any case. + */ + ts->run[i].len = len1 + len2; + if (i == ts->stackSize - 3) { + ts->run[i + 1] = ts->run[i + 2]; + } + ts->stackSize--; + + /* + * Find where the first element of run2 goes in run1. Prior elements + * in run1 can be ignored (because they're already in place). + */ + k = CALL(gallopRight) (base2, base1, len1, 0, CMPARGS(ts->c, ts->carg), width); + base1 = ELEM(base1, k); + len1 -= k; + if (len1 == 0) + return SUCCESS; + + /* + * Find where the last element of run1 goes in run2. Subsequent elements + * in run2 can be ignored (because they're already in place). + */ + len2 = + CALL(gallopLeft) (ELEM(base1, len1 - 1), base2, len2, len2 - 1, + CMPARGS(ts->c, ts->carg), width); + if (len2 == 0) + return SUCCESS; + + // Merge remaining runs, using tmp array with min(len1, len2) elements + if (len1 <= len2) + return CALL(mergeLo) (ts, base1, len1, base2, len2, width); + else + return CALL(mergeHi) (ts, base1, len1, base2, len2, width); +} + +/** + * Locates the position at which to insert the specified key into the + * specified sorted range; if the range contains an element equal to key, + * returns the index of the leftmost equal element. + * + * @param key the key whose insertion point to search for + * @param base the array in which to search + * @param len the length of the range; must be > 0 + * @param hint the index at which to begin the search, 0 <= hint < n. + * The closer hint is to the result, the faster this method will run. + * @param c the comparator used to order the range, and to search + * @return the int k, 0 <= k <= n such that a[b + k - 1] < key <= a[b + k], + * pretending that a[b - 1] is minus infinity and a[b + n] is infinity. + * In other words, key belongs at index b + k; or in other words, + * the first k elements of a should precede key, and the last n - k + * should follow it. + */ +static size_t NAME(gallopLeft) (void *key, void *base, size_t len, + size_t hint, CMPPARAMS(compare, carg), + size_t width) { + char *hintp = ELEM(base, hint); + size_t lastOfs = 0; + size_t ofs = 1; + + assert(len > 0 && hint < len); + if (CMP(compare, carg, key, hintp) > 0) { + // Gallop right until a[hint+lastOfs] < key <= a[hint+ofs] + size_t maxOfs = len - hint; + while (ofs < maxOfs + && CMP(compare, carg, key, ELEM(hintp, ofs)) > 0) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; // eventually this becomes SIZE_MAX + } + if (ofs > maxOfs) + ofs = maxOfs; + + // Make offsets relative to base + lastOfs += hint + 1; // POP: we add 1 here so lastOfs stays non-negative + ofs += hint; + } else { // key <= a[hint] + // Gallop left until a[hint-ofs] < key <= a[hint-lastOfs] + const size_t maxOfs = hint + 1; + size_t tmp; + while (ofs < maxOfs + && CMP(compare, carg, key, ELEM(hintp, -ofs)) <= 0) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; // no need to check for overflow + } + if (ofs > maxOfs) + ofs = maxOfs; + + // Make offsets relative to base + tmp = lastOfs; + lastOfs = hint + 1 - ofs; // POP: we add 1 here so lastOfs stays non-negative + ofs = hint - tmp; + } + assert(lastOfs <= ofs && ofs <= len); + + /* + * Now a[lastOfs-1] < key <= a[ofs], so key belongs somewhere + * to the right of lastOfs but no farther right than ofs. Do a binary + * search, with invariant a[lastOfs - 1] < key <= a[ofs]. + */ + // lastOfs++; POP: we added 1 above to keep lastOfs non-negative + while (lastOfs < ofs) { + //size_t m = lastOfs + ((ofs - lastOfs) >> 1); + // http://stackoverflow.com/questions/4844165/safe-integer-middle-value-formula + size_t m = (lastOfs & ofs) + ((lastOfs ^ ofs) >> 1); + + if (CMP(compare, carg, key, ELEM(base, m)) > 0) + lastOfs = m + 1; // a[m] < key + else + ofs = m; // key <= a[m] + } + assert(lastOfs == ofs); // so a[ofs - 1] < key <= a[ofs] + (void)width; + return ofs; +} + +/** + * Like gallopLeft, except that if the range contains an element equal to + * key, gallopRight returns the index after the rightmost equal element. + * + * @param key the key whose insertion point to search for + * @param base the array in which to search + * @param len the length of the range; must be > 0 + * @param hint the index at which to begin the search, 0 <= hint < n. + * The closer hint is to the result, the faster this method will run. + * @param c the comparator used to order the range, and to search + * @return the int k, 0 <= k <= n such that a[b + k - 1] <= key < a[b + k] + */ +static size_t NAME(gallopRight) (void *key, void *base, size_t len, + size_t hint, CMPPARAMS(compare, carg), + size_t width) { + char *hintp = ELEM(base, hint); + size_t ofs = 1; + size_t lastOfs = 0; + + assert(len > 0 && hint < len); + + if (CMP(compare, carg, key, hintp) < 0) { + // Gallop left until a[hint - ofs] <= key < a[hint - lastOfs] + size_t maxOfs = hint + 1; + size_t tmp; + while (ofs < maxOfs + && CMP(compare, carg, key, ELEM(hintp, -ofs)) < 0) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; // no need to check for overflow + } + if (ofs > maxOfs) + ofs = maxOfs; + + // Make offsets relative to base + tmp = lastOfs; + lastOfs = hint + 1 - ofs; + ofs = hint - tmp; + } else { // a[hint] <= key + // Gallop right until a[hint + lastOfs] <= key < a[hint + ofs] + size_t maxOfs = len - hint; + while (ofs < maxOfs + && CMP(compare, carg, key, ELEM(hintp, ofs)) >= 0) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; // no need to check for overflow + } + if (ofs > maxOfs) + ofs = maxOfs; + + // Make offsets relative to base + lastOfs += hint + 1; + ofs += hint; + } + assert(lastOfs <= ofs && ofs <= len); + + /* + * Now a[lastOfs - 1] <= key < a[ofs], so key belongs somewhere to + * the right of lastOfs but no farther right than ofs. Do a binary + * search, with invariant a[lastOfs - 1] <= key < a[ofs]. + */ + while (lastOfs < ofs) { + // size_t m = lastOfs + ((ofs - lastOfs) >> 1); + size_t m = (lastOfs & ofs) + ((lastOfs ^ ofs) >> 1); + + if (CMP(compare, carg, key, ELEM(base, m)) < 0) + ofs = m; // key < a[m] + else + lastOfs = m + 1; // a[m] <= key + } + assert(lastOfs == ofs); // so a[ofs - 1] <= key < a[ofs] + (void)width; + return ofs; +} + +/** + * Merges two adjacent runs in place, in a stable fashion. The first + * element of the first run must be greater than the first element of the + * second run (a[base1] > a[base2]), and the last element of the first run + * (a[base1 + len1-1]) must be greater than all elements of the second run. + * + * For performance, this method should be called only when len1 <= len2; + * its twin, mergeHi should be called if len1 >= len2. (Either method + * may be called if len1 == len2.) + * + * @param base1 first element in first run to be merged + * @param len1 length of first run to be merged (must be > 0) + * @param base2 first element in second run to be merged + * (must be aBase + aLen) + * @param len2 length of second run to be merged (must be > 0) + */ +static int NAME(mergeLo) (struct timsort * ts, void *base1, size_t len1, + void *base2, size_t len2, size_t width) { + + // Copy first run into temp array + void *tmp = ensureCapacity(ts, len1, width); + char *cursor1; + char *cursor2; + char *dest; + comparator compare; // Use local variable for performance +#ifdef IS_TIMSORT_R + void *carg; // Use local variable for performance +#endif + size_t minGallop; // " " " " " + + assert(len1 > 0 && len2 > 0 && ELEM(base1, len1) == base2); + if (!tmp) + return FAILURE; + + // System.arraycopy(a, base1, tmp, 0, len1); + memcpy(tmp, base1, LEN(len1)); // POP: can't overlap + + cursor1 = tmp; // Indexes into tmp array + cursor2 = base2; // Indexes int a + dest = base1; // Indexes int a + + // Move first element of second run and deal with degenerate cases + // a[dest++] = a[cursor2++]; + ASSIGN(dest, cursor2); + dest = INCPTR(dest); + cursor2 = INCPTR(cursor2); + + if (--len2 == 0) { + memcpy(dest, cursor1, LEN(len1)); // POP: can't overlap + return SUCCESS; + } + if (len1 == 1) { + memmove(dest, cursor2, LEN(len2)); // POP: overlaps + + // a[dest + len2] = tmp[cursor1]; // Last elt of run 1 to end of merge + ASSIGN(ELEM(dest, len2), cursor1); + return SUCCESS; + } + + compare = ts->c; // Use local variable for performance +#ifdef IS_TIMSORT_R + carg = ts->carg; // Use local variable for performance +#endif + minGallop = ts->minGallop; // " " " " " + + while (1) { + size_t count1 = 0; // Number of times in a row that first run won + size_t count2 = 0; // Number of times in a row that second run won + + /* + * Do the straightforward thing until (if ever) one run starts + * winning consistently. + */ + do { + assert(len1 > 1 && len2 > 0); + if (CMP(compare, carg, cursor2, cursor1) < 0) { + ASSIGN(dest, cursor2); + dest = INCPTR(dest); + cursor2 = INCPTR(cursor2); + count2++; + count1 = 0; + if (--len2 == 0) + goto outer; + if (count2 >= minGallop) + break; + } else { + ASSIGN(dest, cursor1); + dest = INCPTR(dest); + cursor1 = INCPTR(cursor1); + count1++; + count2 = 0; + if (--len1 == 1) + goto outer; + if (count1 >= minGallop) + break; + } + } while (1); // (count1 | count2) < minGallop); + + /* + * One run is winning so consistently that galloping may be a + * huge win. So try that, and continue galloping until (if ever) + * neither run appears to be winning consistently anymore. + */ + do { + assert(len1 > 1 && len2 > 0); + count1 = + CALL(gallopRight) (cursor2, cursor1, len1, 0, + CMPARGS(compare, carg), width); + if (count1 != 0) { + memcpy(dest, cursor1, LEN(count1)); // POP: can't overlap + dest = ELEM(dest, count1); + cursor1 = ELEM(cursor1, count1); + len1 -= count1; + if (len1 <= 1) // len1 == 1 || len1 == 0 + goto outer; + } + ASSIGN(dest, cursor2); + dest = INCPTR(dest); + cursor2 = INCPTR(cursor2); + if (--len2 == 0) + goto outer; + + count2 = + CALL(gallopLeft) (cursor1, cursor2, len2, 0, + CMPARGS(compare, carg), width); + if (count2 != 0) { + memmove(dest, cursor2, LEN(count2)); // POP: might overlap + dest = ELEM(dest, count2); + cursor2 = ELEM(cursor2, count2); + len2 -= count2; + if (len2 == 0) + goto outer; + } + ASSIGN(dest, cursor1); + dest = INCPTR(dest); + cursor1 = INCPTR(cursor1); + if (--len1 == 1) + goto outer; + if (minGallop > 0) + minGallop--; + } while (count1 >= MIN_GALLOP || count2 >= MIN_GALLOP); + minGallop += 2; // Penalize for leaving gallop mode + } // End of "outer" loop +outer: + ts->minGallop = minGallop < 1 ? 1 : minGallop; // Write back to field + + if (len1 == 1) { + assert(len2 > 0); + memmove(dest, cursor2, LEN(len2)); // POP: might overlap + ASSIGN(ELEM(dest, len2), cursor1); // Last elt of run 1 to end of merge + + } else if (len1 == 0) { + errno = EINVAL; // Comparison method violates its general contract + return FAILURE; + } else { + assert(len2 == 0); + assert(len1 > 1); + memcpy(dest, cursor1, LEN(len1)); // POP: can't overlap + } + return SUCCESS; +} + +/** + * Like mergeLo, except that this method should be called only if + * len1 >= len2; mergeLo should be called if len1 <= len2. (Either method + * may be called if len1 == len2.) + * + * @param base1 first element in first run to be merged + * @param len1 length of first run to be merged (must be > 0) + * @param base2 first element in second run to be merged + * (must be aBase + aLen) + * @param len2 length of second run to be merged (must be > 0) + */ +static int NAME(mergeHi) (struct timsort * ts, void *base1, size_t len1, + void *base2, size_t len2, size_t width) { + + // Copy second run into temp array + void *tmp = ensureCapacity(ts, len2, width); + char *cursor1; // Indexes into a + char *cursor2; // Indexes into tmp array + char *dest; // Indexes into a + comparator compare; // Use local variable for performance +#ifdef IS_TIMSORT_R + void *carg; // " " " " " +#endif + size_t minGallop; // " " " " " + + assert(len1 > 0 && len2 > 0 && ELEM(base1, len1) == base2); + if (!tmp) + return FAILURE; + + memcpy(tmp, base2, LEN(len2)); // POP: can't overlap + + cursor1 = ELEM(base1, len1 - 1);// Indexes into a + cursor2 = ELEM(tmp, len2 - 1); // Indexes into tmp array + dest = ELEM(base2, len2 - 1); // Indexes into a + + // Move last element of first run and deal with degenerate cases + // a[dest--] = a[cursor1--]; + ASSIGN(dest, cursor1); + dest = DECPTR(dest); + cursor1 = DECPTR(cursor1); + if (--len1 == 0) { + memcpy(ELEM(dest, -(len2 - 1)), tmp, LEN(len2)); // POP: can't overlap + return SUCCESS; + } + if (len2 == 1) { + dest = ELEM(dest, -len1); + cursor1 = ELEM(cursor1, -len1); + memmove(ELEM(dest, 1), ELEM(cursor1, 1), LEN(len1)); // POP: overlaps + // a[dest] = tmp[cursor2]; + ASSIGN(dest, cursor2); + return SUCCESS; + } + + compare = ts->c; // Use local variable for performance +#ifdef IS_TIMSORT_R + carg = ts->carg; // Use local variable for performance +#endif + minGallop = ts->minGallop; // " " " " " + + while (1) { + size_t count1 = 0; // Number of times in a row that first run won + size_t count2 = 0; // Number of times in a row that second run won + + /* + * Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + do { + assert(len1 > 0 && len2 > 1); + if (CMP(compare, carg, cursor2, cursor1) < 0) { + ASSIGN(dest, cursor1); + dest = DECPTR(dest); + cursor1 = DECPTR(cursor1); + count1++; + count2 = 0; + if (--len1 == 0) + goto outer; + } else { + ASSIGN(dest, cursor2); + dest = DECPTR(dest); + cursor2 = DECPTR(cursor2); + count2++; + count1 = 0; + if (--len2 == 1) + goto outer; + } + } while ((count1 | count2) < minGallop); + + /* + * One run is winning so consistently that galloping may be a + * huge win. So try that, and continue galloping until (if ever) + * neither run appears to be winning consistently anymore. + */ + do { + assert(len1 > 0 && len2 > 1); + count1 = + len1 - CALL(gallopRight) (cursor2, base1, + len1, len1 - 1, + CMPARGS(compare, carg), + width); + if (count1 != 0) { + dest = ELEM(dest, -count1); + cursor1 = ELEM(cursor1, -count1); + len1 -= count1; + memmove(INCPTR(dest), INCPTR(cursor1), + LEN(count1)); // POP: might overlap + if (len1 == 0) + goto outer; + } + ASSIGN(dest, cursor2); + dest = DECPTR(dest); + cursor2 = DECPTR(cursor2); + if (--len2 == 1) + goto outer; + + count2 = + len2 - CALL(gallopLeft) (cursor1, tmp, len2, + len2 - 1, CMPARGS(compare, carg), + width); + if (count2 != 0) { + dest = ELEM(dest, -count2); + cursor2 = ELEM(cursor2, -count2); + len2 -= count2; + memcpy(INCPTR(dest), INCPTR(cursor2), LEN(count2)); // POP: can't overlap + if (len2 <= 1) // len2 == 1 || len2 == 0 + goto outer; + } + ASSIGN(dest, cursor1); + dest = DECPTR(dest); + cursor1 = DECPTR(cursor1); + if (--len1 == 0) + goto outer; + if (minGallop > 0) + minGallop--; + } while (count1 >= MIN_GALLOP || count2 >= MIN_GALLOP); + minGallop += 2; // Penalize for leaving gallop mode + } // End of "outer" loop +outer: + ts->minGallop = minGallop < 1 ? 1 : minGallop; // Write back to field + + if (len2 == 1) { + assert(len1 > 0); + dest = ELEM(dest, -len1); + cursor1 = ELEM(cursor1, -len1); + memmove(INCPTR(dest), INCPTR(cursor1), LEN(len1)); // POP: might overlap + // a[dest] = tmp[cursor2]; // Move first elt of run2 to front of merge + ASSIGN(dest, cursor2); + } else if (len2 == 0) { + errno = EINVAL; // Comparison method violates its general contract + return FAILURE; + } else { + assert(len1 == 0); + assert(len2 > 0); + memcpy(ELEM(dest, -(len2 - 1)), tmp, LEN(len2)); // POP: can't overlap + } + + return SUCCESS; +} diff --git a/configure.ac b/configure.ac index 1a17aa904da4d..d7e29d9de5ea2 100644 --- a/configure.ac +++ b/configure.ac @@ -1455,7 +1455,8 @@ PHP_ADD_SOURCES(Zend, \ zend_iterators.c zend_interfaces.c zend_exceptions.c zend_strtod.c zend_gc.c \ zend_closures.c zend_weakrefs.c zend_float.c zend_string.c zend_signal.c zend_generators.c \ zend_virtual_cwd.c zend_ast.c zend_objects.c zend_object_handlers.c zend_objects_API.c \ - zend_default_classes.c zend_inheritance.c zend_smart_str.c zend_cpuinfo.c zend_gdb.c, \ + zend_default_classes.c zend_inheritance.c zend_smart_str.c zend_cpuinfo.c zend_gdb.c \ + zend_timsort.c, \ -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) dnl Selectively disable optimization due to high RAM usage during compiling the