From 0e181024c00b746a930aab6a0cfd9162d7b67ae4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?= Date: Sat, 18 Dec 2010 05:27:46 +0000 Subject: [PATCH] sort: use at most 8 threads by default * src/sort.c (main): If --parallel isn't specified, restrict the number of threads to 8 by default. If the --parallel option is specified, then allow any number of threads to be set, independent of the number of processors on the system. * doc/coreutils.texi (sort invocation): Document the changes to determining the number of threads to use. Mention the memory overhead when using multiple threads. * tests/misc/sort-spinlock-abuse: Allow single core systems that support pthreads. * tests/misc/sort-stale-thread-mem: Likewise. * tests/misc/sort-unique-segv: Likewise. * NEWS: Mention the change in behaviour. --- NEWS | 6 ++++++ doc/coreutils.texi | 9 +++++---- src/sort.c | 17 +++++++++++------ tests/misc/sort-spinlock-abuse | 3 ++- tests/misc/sort-stale-thread-mem | 3 ++- tests/misc/sort-unique-segv | 3 ++- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index 484ed5c..7eda1b2 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,12 @@ GNU coreutils NEWS -*- outline -*- sort -m -o f f ... f no longer dumps core when file descriptors are limited. +** Changes in behavior + + sort will not create more than 8 threads by default due to diminishing + performance gains. Also the --parallel option is no longer restricted + to the number of available processors. + ** New features split accepts the --number option to generate a specific number of files. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 00a5575..a74f645 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -4190,10 +4190,11 @@ disks and controllers. @item --parallel=@var{n} @opindex --parallel @cindex multithreaded sort -Limit the number of sorts run in parallel to @var{n}. By default, -@var{n} is set to the number of available processors, and values -greater than that are reduced to that limit. Also see -@ref{nproc invocation}. +Set the number of sorts run in parallel to @var{n}. By default, +@var{n} is set to the number of available processors, but limited +to 8, as there are diminishing performance gains after that. +Note also that using @var{n} threads increases the memory usage by +a factor of log @var{n}. Also see @ref{nproc invocation}. @item -u @itemx --unique diff --git a/src/sort.c b/src/sort.c index 54dd815..6cc0588 100644 --- a/src/sort.c +++ b/src/sort.c @@ -116,6 +116,10 @@ struct rlimit { size_t rlim_cur; }; this number has any practical effect. */ enum { SUBTHREAD_LINES_HEURISTIC = 4 }; +/* The number of threads after which there are + diminishing performance gains. */ +enum { DEFAULT_MAX_THREADS = 8 }; + /* Exit statuses. */ enum { @@ -455,7 +459,7 @@ Other options:\n\ -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ multiple options specify multiple directories\n\ - --parallel=N limit the number of sorts run concurrently to N\n\ + --parallel=N change the number sorts run concurrently to N\n\ -u, --unique with -c, check for strict ordering;\n\ without -c, output only the first of an equal run\n\ "), DEFAULT_TMPDIR); @@ -4595,14 +4599,15 @@ main (int argc, char **argv) } else { - unsigned long int np2 = num_processors (NPROC_CURRENT_OVERRIDABLE); - if (!nthreads || nthreads > np2) - nthreads = np2; + if (!nthreads) + { + nthreads = MIN (DEFAULT_MAX_THREADS, + num_processors (NPROC_CURRENT_OVERRIDABLE)); + } /* Avoid integer overflow later. */ size_t nthreads_max = SIZE_MAX / (2 * sizeof (struct merge_node)); - if (nthreads_max < nthreads) - nthreads = nthreads_max; + nthreads = MIN (nthreads, nthreads_max); sort (files, nfiles, outfile, nthreads); } diff --git a/tests/misc/sort-spinlock-abuse b/tests/misc/sort-spinlock-abuse index bbf5753..67ea895 100755 --- a/tests/misc/sort-spinlock-abuse +++ b/tests/misc/sort-spinlock-abuse @@ -20,7 +20,8 @@ . "${srcdir=.}/init.sh"; path_prepend_ ../src print_ver_ sort -test "$(nproc)" = 1 && skip_ "requires a multi-core system" +grep '^#define HAVE_PTHREAD_T 1' "$CONFIG_HEADER" > /dev/null || + skip_test_ 'requires pthreads' seq 100000 > in || framework_failure_ mkfifo fifo || framework_failure_ diff --git a/tests/misc/sort-stale-thread-mem b/tests/misc/sort-stale-thread-mem index 2955e22..1f408d4 100755 --- a/tests/misc/sort-stale-thread-mem +++ b/tests/misc/sort-stale-thread-mem @@ -24,7 +24,8 @@ print_ver_ sort very_expensive_ valgrind --help >/dev/null || skip_ "requires valgrind" -test "$(nproc)" = 1 && skip_ "requires a multi-core system" +grep '^#define HAVE_PTHREAD_T 1' "$CONFIG_HEADER" > /dev/null || + skip_test_ 'requires pthreads' # gensort output seems to trigger the failure more often, # so prefer gensort if it is available. diff --git a/tests/misc/sort-unique-segv b/tests/misc/sort-unique-segv index 55a7414..c4854f9 100755 --- a/tests/misc/sort-unique-segv +++ b/tests/misc/sort-unique-segv @@ -19,7 +19,8 @@ . "${srcdir=.}/init.sh"; path_prepend_ ../src print_ver_ sort -test "$(nproc)" = 1 && skip_ "requires a multi-core system" +grep '^#define HAVE_PTHREAD_T 1' "$CONFIG_HEADER" > /dev/null || + skip_test_ 'requires pthreads' cat <<\EOF > in || framework_failure_ -- 2.7.4