diff --git a/Makefile b/Makefile index 8735a36..646639f 100644 --- a/Makefile +++ b/Makefile @@ -28,4 +28,4 @@ benchmark: benchmark.c sort.h $(CC) $(CFLAGS) benchmark.c -o benchmark format: - astyle --options=astyle.options sort.h bitonic_sort.h demo.c multidemo.c stresstest.c benchmark.c \ No newline at end of file + astyle --options=astyle.options sort.h demo.c multidemo.c stresstest.c benchmark.c \ No newline at end of file diff --git a/sort.h b/sort.h index d9c199a..6ed77cf 100644 --- a/sort.h +++ b/sort.h @@ -1089,7 +1089,7 @@ void MERGE_SORT_IN_PLACE(SORT_TYPE *dst, const size_t len) { q = 2; while ((p & q) == 0) { - if (SORT_CMP(dst1[1 - q], dst1[-q]) < 0) { + if (SORT_CMP(dst1[1 - q], dst1[-(int) q]) < 0) { break; } @@ -1196,8 +1196,6 @@ void MERGE_SORT(SORT_TYPE *dst, const size_t size) { } -/* Quick sort: based on wikipedia */ - static __inline size_t QUICK_SORT_PARTITION(SORT_TYPE *dst, const size_t left, const size_t right, const size_t pivot) { SORT_TYPE value = dst[pivot]; @@ -1228,6 +1226,38 @@ static __inline size_t QUICK_SORT_PARTITION(SORT_TYPE *dst, const size_t left, return index; } +/* Based on Knuth vol. 3 +static __inline size_t QUICK_SORT_HOARE_PARTITION(SORT_TYPE *dst, const size_t l, + const size_t r, const size_t pivot) { + SORT_TYPE value; + size_t i = l + 1; + size_t j = r; + + if (pivot != l) { + SORT_SWAP(dst[pivot], dst[l]); + } + value = dst[l]; + + while (1) { + while (SORT_CMP(dst[i], value) < 0) { + i++; + } + while (SORT_CMP(value, dst[j]) < 0) { + j--; + } + if (j <= i) { + SORT_SWAP(dst[l], dst[j]); + return j; + } + SORT_SWAP(dst[i], dst[j]); + i++; + j--; + } + return 0; +} +*/ + + /* Return the median index of the objects at the three indices. */ static __inline size_t MEDIAN(const SORT_TYPE *dst, const size_t a, const size_t b, const size_t c) { @@ -1274,31 +1304,60 @@ static __inline size_t MEDIAN(const SORT_TYPE *dst, const size_t a, const size_t } } -static void QUICK_SORT_RECURSIVE(SORT_TYPE *dst, const size_t left, const size_t right) { +static void QUICK_SORT_RECURSIVE(SORT_TYPE *dst, const size_t original_left, + const size_t original_right) { + size_t left; + size_t right; size_t pivot; size_t new_pivot; + size_t middle; + int loop_count = 0; + const int max_loops = 64 - CLZ(original_right - original_left); /* ~lg N */ + left = original_left; + right = original_right; - if (right <= left) { - return; - } + while (1) { + if (right <= left) { + return; + } - if ((right - left + 1U) <= SMALL_SORT_BND) { - SMALL_SORT(&dst[left], right - left + 1U); - return; - } + if ((right - left + 1U) <= SMALL_SORT_BND) { + SMALL_SORT(&dst[left], right - left + 1U); + return; + } + + if (++loop_count >= max_loops) { + /* we have recursed / looped too many times; switch to heap sort */ + HEAP_SORT(&dst[left], right - left + 1U); + return; + } - pivot = left + ((right - left) >> 1); - /* this seems to perform worse by a small amount... ? */ - /* pivot = MEDIAN(dst, left, pivot, right); */ - new_pivot = QUICK_SORT_PARTITION(dst, left, right, pivot); + /* median of 5 */ + middle = left + ((right - left) >> 1); + pivot = MEDIAN((const SORT_TYPE *) dst, left, middle, right); + pivot = MEDIAN((const SORT_TYPE *) dst, left + ((middle - left) >> 1), pivot, + middle + ((right - middle) >> 1)); + new_pivot = QUICK_SORT_PARTITION(dst, left, right, pivot); - /* check for partition all equal */ - if (new_pivot == SIZE_MAX) { - return; - } + /* check for partition all equal */ + if (new_pivot == SIZE_MAX) { + return; + } - QUICK_SORT_RECURSIVE(dst, left, new_pivot - 1U); - QUICK_SORT_RECURSIVE(dst, new_pivot + 1U, right); + /* recurse only on the small part to avoid degenerate stack sizes */ + /* and manually do tail call on the large part */ + if (new_pivot - 1U - left > right - new_pivot - 1U) { + /* left is bigger than right */ + QUICK_SORT_RECURSIVE(dst, new_pivot + 1U, right); + /* tail call for left */ + right = new_pivot - 1U; + } else { + /* right is bigger than left */ + QUICK_SORT_RECURSIVE(dst, left, new_pivot - 1U); + /* tail call for right */ + left = new_pivot + 1U; + } + } } void QUICK_SORT(SORT_TYPE *dst, const size_t size) { diff --git a/stresstest.c b/stresstest.c index c944eef..7b11afb 100644 --- a/stresstest.c +++ b/stresstest.c @@ -16,7 +16,7 @@ /* Used to control the stress test */ #define SEED 123 -#define MAXSIZE 1600 +#define MAXSIZE 45000 #define TESTS 1000 #define RAND_RANGE(__n, __min, __max) \ @@ -31,6 +31,7 @@ enum { FILL_SORTED_10000, FILL_SWAPPED_N2, FILL_SWAPPED_N8, + FILL_EVIL, FILL_LAST_ELEMENT }; @@ -42,7 +43,8 @@ char *test_names[FILL_LAST_ELEMENT] = { "sorted blocks of length 100", "sorted blocks of length 10000", "swapped size/2 pairs", - "swapped size/8 pairs" + "swapped size/8 pairs", + "known evil data" }; /* used for stdlib */ @@ -126,6 +128,14 @@ static void fill_swapped(int64_t *dst, const int size, const int swapped_cnt) { dst[ind2] = tmp; } +static void fill_evil(int64_t *dst, const int size) { + int i; + + for (i = 0; i < size; i++) { + dst[i] = i ^ 1; + } +} + static void fill(int64_t *dst, const int size, int type) { switch (type) { case FILL_SORTED: @@ -156,6 +166,10 @@ static void fill(int64_t *dst, const int size, int type) { fill_same(dst, size); break; + case FILL_EVIL: + fill_evil(dst, size); + break; + case FILL_RANDOM: default: fill_random(dst, size); @@ -351,6 +365,8 @@ int main(void) { RAND_RANGE(sizes[i], 0, MAXSIZE); } + sizes[TESTS - 1] = 45000; + for (i = 0; i < FILL_LAST_ELEMENT; i++) { int result = run_tests(sizes, TESTS, i);