1 /* Test and benchmark of a couple of parallel sorting algorithms.
2 Copyright (C) 2008-2013 Free Software Foundation, Inc.
4 GCC is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3, or (at your option) any later
9 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
30 verify (const char *name, double stime, int *array, int count)
33 double etime = omp_get_wtime ();
35 printf ("%s: %g\n", name, etime - stime);
36 for (i = 1; i < count; i++)
37 if (array[i] < array[i - 1])
39 printf ("%s: incorrectly sorted\n", name);
45 insertsort (int *array, int s, int e)
48 for (i = s + 1; i <= e; i++)
52 while (j-- > s && val < array[j])
53 array[j + 1] = array[j];
67 #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
68 struct int_pair arr[STACK_SIZE];
72 init_int_pair_stack (struct int_pair_stack *stack)
74 stack->top = &stack->arr[0];
78 push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
86 pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
94 size_int_pair_stack (struct int_pair_stack *stack)
96 return stack->top - &stack->arr[0];
102 #if defined __i386__ || defined __x86_64__
103 __builtin_ia32_pause ();
104 #elif defined __ia64__
105 __asm volatile ("hint @pause" : : : "memory");
106 #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
107 __asm volatile ("membar #LoadLoad" : : : "memory");
109 __asm volatile ("" : : : "memory");
114 swap (int *array, int a, int b)
122 choose_pivot (int *array, int lo, int hi)
124 int mid = (lo + hi) / 2;
126 if (array[mid] < array[lo])
127 swap (array, lo, mid);
128 if (array[hi] < array[mid])
130 swap (array, mid, hi);
131 if (array[mid] < array[lo])
132 swap (array, lo, mid);
138 partition (int *array, int lo, int hi)
140 int pivot = choose_pivot (array, lo, hi);
146 while (array[++left] < pivot);
147 while (array[--right] > pivot);
150 swap (array, left, right);
156 sort1 (int *array, int count)
159 struct int_pair_stack global_stack;
163 omp_init_lock (&lock);
164 init_int_pair_stack (&global_stack);
165 #pragma omp parallel firstprivate (array, count)
167 int lo = 0, hi = 0, mid, next_lo, next_hi;
169 struct int_pair_stack local_stack;
171 init_int_pair_stack (&local_stack);
172 if (omp_get_thread_num () == 0)
174 num_threads = omp_get_num_threads ();
181 if (hi - lo < THRESHOLD)
183 insertsort (array, lo, hi);
188 if (size_int_pair_stack (&local_stack) == 0)
191 omp_set_lock (&lock);
192 if (size_int_pair_stack (&global_stack) == 0)
198 omp_unset_lock (&lock);
201 omp_unset_lock (&lock);
203 while (size_int_pair_stack (&global_stack) == 0
210 pop_int_pair_stack (&global_stack, &lo, &hi);
211 omp_unset_lock (&lock);
215 pop_int_pair_stack (&local_stack, &lo, &hi);
218 mid = partition (array, lo, hi);
219 if (mid - lo < hi - mid)
232 if (next_hi - next_lo < THRESHOLD)
233 insertsort (array, next_lo, next_hi);
236 if (size_int_pair_stack (&global_stack) < num_threads - 1)
240 omp_set_lock (&lock);
241 size = size_int_pair_stack (&global_stack);
242 if (size < num_threads - 1 && size < STACK_SIZE)
243 push_int_pair_stack (&global_stack, next_lo, next_hi);
245 push_int_pair_stack (&local_stack, next_lo, next_hi);
246 omp_unset_lock (&lock);
249 push_int_pair_stack (&local_stack, next_lo, next_hi);
253 omp_destroy_lock (&lock);
257 sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
261 if (hi - lo < THRESHOLD)
263 insertsort (array, lo, hi);
267 mid = partition (array, lo, hi);
269 if (*busy >= num_threads)
271 sort2_1 (array, lo, mid - 1, num_threads, busy);
272 sort2_1 (array, mid, hi, num_threads, busy);
279 #pragma omp parallel num_threads (2) \
280 firstprivate (array, lo, hi, mid, num_threads, busy)
282 if (omp_get_thread_num () == 0)
283 sort2_1 (array, lo, mid - 1, num_threads, busy);
286 sort2_1 (array, mid, hi, num_threads, busy);
294 sort2 (int *array, int count)
300 #pragma omp single nowait
301 num_threads = omp_get_num_threads ();
303 sort2_1 (array, 0, count - 1, num_threads, &busy);
306 #if _OPENMP >= 200805
308 sort3_1 (int *array, int lo, int hi)
312 if (hi - lo < THRESHOLD)
314 insertsort (array, lo, hi);
318 mid = partition (array, lo, hi);
320 sort3_1 (array, lo, mid - 1);
321 sort3_1 (array, mid, hi);
325 sort3 (int *array, int count)
329 sort3_1 (array, 0, count - 1);
334 main (int argc, char **argv)
336 int i, count = 1000000;
338 int *unsorted, *sorted, num_threads;
340 count = strtoul (argv[1], NULL, 0);
342 unsorted = malloc (count * sizeof (int));
343 sorted = malloc (count * sizeof (int));
344 if (unsorted == NULL || sorted == NULL)
346 puts ("allocation failure");
351 for (i = 0; i < count; i++)
352 unsorted[i] = rand ();
357 #pragma omp single nowait
358 num_threads = omp_get_num_threads ();
359 printf ("Threads: %d\n", num_threads);
361 memcpy (sorted, unsorted, count * sizeof (int));
362 stime = omp_get_wtime ();
363 sort1 (sorted, count);
364 verify ("sort1", stime, sorted, count);
366 memcpy (sorted, unsorted, count * sizeof (int));
367 stime = omp_get_wtime ();
368 sort2 (sorted, count);
369 verify ("sort2", stime, sorted, count);
371 #if _OPENMP >= 200805
372 memcpy (sorted, unsorted, count * sizeof (int));
373 stime = omp_get_wtime ();
374 sort3 (sorted, count);
375 verify ("sort3", stime, sorted, count);