2 # Copyright Steven J. Ross 2008 - 2014
\r
3 # Distributed under the Boost Software License, Version 1.0.
\r
4 # (See accompanying file LICENSE_1_0.txt or copy at
\r
5 # http://www.boost.org/LICENSE_1_0.txt)
\r
7 # See http://www.boost.org/libs/sort for library home page.
\r
9 # A speed and accuracy testing and automated parameter tuning script.
\r
11 $usage = "usage: tune.pl [-tune] [-real] [-tune_verify] [-verbose] [-multiple_iterations] [-large] [-small] [-windows] [fileSize]\n";
\r
12 # testing sorting on 40 million elements by default
\r
13 # don't test on below 2^22 (4 million) elements as that is the minimum
\r
14 # for max_splits of 11 to be efficient
\r
16 $defFileSize = 5000000;
\r
21 $exename = "spreadsort";
\r
22 $makename = "../../b2 \-\-tune";
\r
26 $log = "> .tunelog";
\r
27 $log2 = "> .tunelog 2>&1";
\r
30 # have to change the path for UNIX
\r
31 $prev_path = $ENV{'PATH'};
\r
32 $ENV{'PATH'} = '.:'.$prev_path;
\r
34 for (my $ii = 0; $ii < @ARGV; $ii++) {
\r
35 my $currArg = $ARGV[$ii];
\r
36 if ($currArg =~ /^-help$/) {
\r
37 print STDERR $usage;
\r
40 # verification roughly doubles the runtime of this script,
\r
41 # but it does make sure that results are correct during tuning
\r
42 # verification always runs during speed comparisons with std::sort
\r
43 if ($currArg =~ /^-tune_verify$/) {
\r
45 # use real times only, don't use weighting and special-case tests
\r
46 # this saves about 5/6 of the script runtime but results are different
\r
47 } elsif ($currArg =~ /^-real$/) {
\r
49 } elsif ($currArg =~ /^-verbose$/) {
\r
51 # runs until we converge on a precise set of values
\r
52 # defaults off because of runtime
\r
53 } elsif ($currArg =~ /^-multiple_iterations$/) {
\r
55 } elsif ($currArg =~ /^-debug$/) {
\r
59 } elsif ($currArg =~ /^-large$/) {
\r
60 $defFileSize = 20000000;
\r
61 } elsif ($currArg =~ /^-small$/) {
\r
62 $defFileSize = 100000;
\r
63 } elsif ($currArg =~ /^-tune$/) {
\r
65 } elsif ($currArg =~ /^-windows$/) {
\r
66 $makename = "..\\..\\".$makename;
\r
67 } elsif ($currArg =~ /^-/) {
\r
68 print STDERR $usage;
\r
71 $defFileSize = $currArg;
\r
74 $fileSize = $defFileSize;
\r
76 print STDOUT "Tuning variables for $exename on vectors with $defFileSize elements\n";
\r
78 # these are reasonable values
\r
80 $log_finishing_count = 31;
\r
82 $log_mean_bin_size = 2;
\r
83 $float_log_min_size = 10;
\r
84 $float_log_mean_bin_size = 2;
\r
85 $float_log_finishing_count = 4;
\r
87 # this value is a minimum to obtain decent file I/O performance
\r
88 $min_sort_size = 1000;
\r
91 print STDOUT "building randomgen\n";
\r
92 system("$makename randomgen $log");
\r
93 # Tuning to get convergence, maximum of 4 iterations with multiple iterations
\r
98 for ($ii = 0; $changed and $ii < $iter_count; $ii++) {
\r
100 # Tuning max_splits is not recommended.
\r
101 #print STDOUT "Tuning max_splits\n";
\r
102 #TuneVariable(\$max_splits, $log_min_size - $log_mean_bin_size, 17);
\r
103 print STDOUT "Tuning log of the minimum count for recursion\n";
\r
104 TuneVariable(\$log_min_size, $log_mean_bin_size + 1, $max_splits + $log_mean_bin_size);
\r
105 print STDOUT "Tuning log_mean_bin_size\n";
\r
106 TuneVariable(\$log_mean_bin_size, 0, $log_min_size - 1);
\r
107 print STDOUT "Tuning log_finishing_size\n";
\r
108 TuneVariable(\$log_finishing_count, 1, $log_min_size);
\r
109 # tuning variables for floats
\r
110 $exename = "floatsort";
\r
111 print STDOUT "Tuning log of the minimum count for recursion for floats\n";
\r
112 TuneVariable(\$float_log_min_size, $float_log_mean_bin_size + 1, $max_splits + $float_log_mean_bin_size);
\r
113 print STDOUT "Tuning float_log_mean_bin_size\n";
\r
114 TuneVariable(\$float_log_mean_bin_size, 0, $float_log_min_size - 1);
\r
115 print STDOUT "Tuning float_log_finishing_size\n";
\r
116 TuneVariable(\$float_log_finishing_count, 1, $float_log_min_size);
\r
117 $exename = "spreadsort";
\r
120 # After optimizations for large datasets are complete, see how small of a
\r
121 # dataset can be sped up
\r
122 print STDOUT "Tuning minimum sorting size\n";
\r
124 print STDOUT "Writing results\n";
\r
127 # Doing a final run with final settings to compare sort times
\r
128 # also verifying correctness of results
\r
129 $verifycorrect = 1;
\r
131 $fileSize = $defFileSize;
\r
132 system("$makename $all $log");
\r
134 PerfTest("Verifying integer_sort", "spreadsort");
\r
135 PerfTest("Verifying float_sort", "floatsort");
\r
136 PerfTest("Verifying string_sort", "stringsort");
\r
137 PerfTest("Verifying integer_sort with mostly-sorted data", "mostlysorted");
\r
138 PerfTest("Timing integer_sort on already-sorted data", "alreadysorted");
\r
139 PerfTest("Verifying integer_sort with rightshift", "rightshift");
\r
140 PerfTest("Verifying integer_sort with 64-bit integers", "int64");
\r
141 PerfTest("Verifying integer_sort with separate key and data", "keyplusdata");
\r
142 PerfTest("Verifying reverse integer_sort", "reverseintsort");
\r
143 PerfTest("Verifying float_sort with doubles", "double");
\r
144 PerfTest("Verifying float_sort with shift functor", "shiftfloatsort");
\r
145 PerfTest("Verifying float_sort with functors", "floatfunctorsort");
\r
146 PerfTest("Verifying string_sort with indexing functors", "charstringsort");
\r
147 PerfTest("Verifying string_sort with all functors", "stringfunctorsort");
\r
148 PerfTest("Verifying reverse_string_sort", "reversestringsort");
\r
149 PerfTest("Verifying reverse_string_sort with functors",
\r
150 "reversestringfunctorsort");
\r
151 PerfTest("Verifying generalized string_sort with multiple keys of different types",
\r
152 "generalizedstruct");
\r
153 PerfTest("Verifying boost::sort on its custom-built worst-case distribution",
\r
154 "binaryalrbreaker");
\r
155 # clean up once we finish
\r
156 system("$makename clean $log");
\r
158 system("del spread_sort_out.txt $log2");
\r
159 system("del standard_sort_out.txt $log2");
\r
160 system("del input.txt $log2");
\r
161 system("del *.rsp $log2");
\r
162 system("del *.manifest $log2");
\r
163 system("del time.txt $log2");
\r
165 system("rm -f time.txt $log2");
\r
166 system("rm -f spread_sort_out.txt $log2");
\r
167 system("rm -f standard_sort_out.txt $log2");
\r
168 system("rm -f input.txt $log2");
\r
170 $ENV{'PATH'} = $prev_path;
\r
172 # A simple speed test comparing std::sort to
\r
174 my ($message, $local_exe) = @_;
\r
175 $exename = $local_exe;
\r
176 print STDOUT "$message\n";
\r
177 $lastTime = SumTimes();
\r
178 print STDOUT "runtime: $lastTime\n";
\r
179 print STDOUT "std::sort time: $baseTime\n";
\r
180 $speedup = (($baseTime/$lastTime) - 1) * 100;
\r
181 print STDOUT "speedup: ".sprintf("%.2f", $speedup)."%\n";
\r
184 # Write an updated constants file as part of tuning.
\r
185 sub WriteConstants {
\r
186 # deleting the file
\r
187 $const_file = 'include/boost/sort/spreadsort/detail/constants.hpp';
\r
188 @cannot = grep {not unlink} $const_file;
\r
189 print "$0: could not unlink @cannot\n" if @cannot;
\r
191 # writing the results back to the original file name
\r
192 unless(open(CONSTANTS, ">$const_file")) {
\r
193 print STDERR "Can't open output file: $const_file: $!\n";
\r
196 print CONSTANTS "//constant definitions for the Boost Sort library\n\n";
\r
197 print CONSTANTS "// Copyright Steven J. Ross 2001 - 2014\n";
\r
198 print CONSTANTS "// Distributed under the Boost Software License, Version 1.0.\n";
\r
199 print CONSTANTS "// (See accompanying file LICENSE_1_0.txt or copy at\n";
\r
200 print CONSTANTS "// http://www.boost.org/LICENSE_1_0.txt)\n\n";
\r
201 print CONSTANTS "// See http://www.boost.org/libs/sort for library home page.\n";
\r
202 print CONSTANTS "#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";
\r
203 print CONSTANTS "#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";
\r
204 print CONSTANTS "namespace boost {\n";
\r
205 print CONSTANTS "namespace sort {\n";
\r
206 print CONSTANTS "namespace spreadsort {\n";
\r
207 print CONSTANTS "namespace detail {\n";
\r
208 print CONSTANTS "//Tuning constants\n";
\r
209 print CONSTANTS "//This should be tuned to your processor cache;\n";
\r
210 print CONSTANTS "//if you go too large you get cache misses on bins\n";
\r
211 print CONSTANTS "//The smaller this number, the less worst-case memory usage.\n";
\r
212 print CONSTANTS "//If too small, too many recursions slow down spreadsort\n";
\r
213 print CONSTANTS "enum { max_splits = $max_splits,\n";
\r
214 print CONSTANTS "//It's better to have a few cache misses and finish sorting\n";
\r
215 print CONSTANTS "//than to run another iteration\n";
\r
216 print CONSTANTS "max_finishing_splits = max_splits + 1,\n";
\r
217 print CONSTANTS "//Sets the minimum number of items per bin.\n";
\r
218 print CONSTANTS "int_log_mean_bin_size = $log_mean_bin_size,\n";
\r
219 print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";
\r
220 print CONSTANTS "//Minimum value 1\n";
\r
221 $log_min_split_count = $log_min_size - $log_mean_bin_size;
\r
222 print CONSTANTS "int_log_min_split_count = $log_min_split_count,\n";
\r
223 print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";
\r
224 print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to integer_sort.\n";
\r
225 print CONSTANTS "int_log_finishing_count = $log_finishing_count,\n";
\r
226 print CONSTANTS "//Sets the minimum number of items per bin for floating point.\n";
\r
227 print CONSTANTS "float_log_mean_bin_size = $float_log_mean_bin_size,\n";
\r
228 print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";
\r
229 print CONSTANTS "//Minimum value 1\n";
\r
230 $float_log_min_split_count = $float_log_min_size - $float_log_mean_bin_size;
\r
231 print CONSTANTS "float_log_min_split_count = $float_log_min_split_count,\n";
\r
232 print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";
\r
233 print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to float_sort.\n";
\r
234 print CONSTANTS "float_log_finishing_count = $float_log_finishing_count,\n";
\r
235 print CONSTANTS "//There is a minimum size below which it is not worth using spreadsort\n";
\r
236 print CONSTANTS "min_sort_size = $min_sort_size };\n";
\r
237 print CONSTANTS "}\n}\n}\n}\n#endif\n";
\r
239 system("$makename $exename $log");
\r
242 # Sort the file with both std::sort and boost::sort, verify the results are the
\r
243 # same, update stdtime with std::sort time, and return boost::sort time.
\r
245 my $sort_time = 0.0;
\r
246 my $time_file = "time.txt";
\r
247 # use the line below on systems that can't overwrite.
\r
248 #system("rm -f $time_file");
\r
249 system("$exename $loopCount $std > $time_file");
\r
250 unless(open(CODE, $time_file)) {
\r
251 print STDERR "Could not open file: $time_file: $!\n";
\r
254 while ($line = <CODE>) {
\r
255 @parts = split("time", $line);
\r
257 $sort_time = $parts[1];
\r
262 # verifying correctness
\r
263 if (not $std and $verifycorrect) {
\r
264 system("$exename $loopCount -std > $time_file");
\r
265 unless(open(CODE, $time_file)) {
\r
266 print STDERR "Could not open file: $time_file: $!\n";
\r
269 die "Difference in results\n" unless (compare("boost_sort_out.txt","standard_sort_out.txt") == 0) ;
\r
270 while ($line = <CODE>) {
\r
271 @parts = split("time", $line);
\r
273 $stdsingle = $parts[1];
\r
282 # Sum up times for different data distributions. If realtimes is not set,
\r
283 # larger ranges are given a larger weight.
\r
289 # if we're only using real times, don't bother with the corner-cases
\r
293 for (; $ii <= 16; $ii++) {
\r
294 system("randomgen $ii $ii $fileSize");
\r
296 $time += CheckTime();
\r
297 $baseTime += $stdsingle;
\r
299 # tests with higher levels of randomness are given
\r
300 # higher priority in timing results
\r
301 print STDOUT "trying $ii $ii\n" if $debug;
\r
302 $time += 2 * $ii * CheckTime();
\r
303 $baseTime += 2 * $ii * $stdsingle;
\r
305 print STDOUT "trying 1 $ii\n" if $debug;
\r
306 system("randomgen 1 $ii $fileSize");
\r
307 $time += $ii * CheckTime();
\r
308 $baseTime += $ii * $stdsingle;
\r
309 print STDOUT "trying $ii 1\n" if $debug;
\r
310 system("randomgen $ii 1 $fileSize");
\r
311 $time += $ii * CheckTime();
\r
312 $baseTime += $ii * $stdsingle;
\r
316 if ($time == 0.0) {
\r
322 # Tests a range of potential values for a variable, and sets it to the fastest.
\r
324 my ($tunevar, $beginval, $endval) = @_;
\r
325 my $best_val = $$tunevar;
\r
327 my $startval = $$tunevar;
\r
328 for ($$tunevar = $beginval; $$tunevar <= $endval; $$tunevar++) {
\r
330 $sumtime = SumTimes();
\r
331 # If this value is better, use it. If this is the start value
\r
332 # and it's just as good, use the startval
\r
333 if (not $besttime or ($sumtime < $besttime) or (($besttime == $sumtime) and ($$tunevar == $startval))) {
\r
334 $besttime = $sumtime;
\r
335 $best_val = $$tunevar;
\r
337 print STDOUT "Value: $$tunevar Time: $sumtime\n" if $verbose;
\r
339 $$tunevar = $best_val;
\r
340 print STDOUT "Best Value: $best_val\n";
\r
341 if ($best_val != $startval) {
\r
346 # Determine the cutoff size below which std::sort is faster.
\r
348 for (; $min_sort_size <= $defFileSize; $min_sort_size *= 2) {
\r
349 $loopCount = ($defFileSize/$min_sort_size)/10;
\r
350 $fileSize = $min_sort_size;
\r
353 $sumtime = SumTimes();
\r
355 $stdtime = SumTimes();
\r
356 print STDOUT "Size: $min_sort_size boost::sort Time: $sumtime std::sort Time: $stdtime\n";
\r
357 last if ($stdtime > $sumtime);
\r