2 Copyright (c) 2005-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
18 This file contains a few implementations, so it may look overly complicated.
19 The most efficient implementation is also separated into convex_hull_sample.cpp
21 #include "convex_hull.h"
23 typedef util::point<double> point_t;
32 #if !USETBB // Serial implementation of Quick Hull algorithm
34 typedef std::vector< point_t > pointVec_t;
36 void serial_initialize(pointVec_t &points);
38 // C++ style serial code
40 class FindXExtremum : public std::unary_function<const point_t&, void> {
46 FindXExtremum(const point_t& frstPoint, extremumType exType_)
47 : extrXPoint(frstPoint), exType(exType_) {}
49 void operator()(const point_t& p) {
50 if(closerToExtremum(p))
59 const extremumType exType;
62 bool closerToExtremum(const point_t &p) const {
65 return p.x<extrXPoint.x; break;
67 return p.x>extrXPoint.x; break;
69 return false; // avoid warning
73 template <FindXExtremum::extremumType type>
74 point_t extremum(const pointVec_t &points) {
75 assert(!points.empty());
76 return std::for_each(points.begin(), points.end(), FindXExtremum(points[0], type));
79 class SplitByCP : public std::unary_function<const point_t&, void> {
80 pointVec_t &reducedSet;
86 SplitByCP( point_t _p1, point_t _p2, pointVec_t &_reducedSet)
87 : p1(_p1), p2(_p2), reducedSet(_reducedSet), howFar(0), farPoint(p1) {}
89 void operator()(const point_t& p) {
91 if( (p != p1) && (p != p2) ) {
92 cp = util::cross_product(p1, p2, p);
94 reducedSet.push_back(p);
108 point_t divide(const pointVec_t &P, pointVec_t &P_reduced, const point_t &p1, const point_t &p2) {
109 SplitByCP splitByCP(p1, p2, P_reduced);
110 point_t farPoint = std::for_each(P.begin(), P.end(), splitByCP);
113 std::stringstream ss;
114 ss << P.size() << " nodes in bucket"<< ", "
115 << "dividing by: [ " << p1 << ", " << p2 << " ], "
116 << "farthest node: " << farPoint;
117 util::OUTPUT.push_back(ss.str());
123 void divide_and_conquer(const pointVec_t &P, pointVec_t &H, point_t p1, point_t p2) {
124 assert(P.size() >= 2);
125 pointVec_t P_reduced;
127 point_t p_far = divide(P, P_reduced, p1, p2);
128 if (P_reduced.size()<2) {
130 H.insert(H.end(), P_reduced.begin(), P_reduced.end());
133 divide_and_conquer(P_reduced, H1, p1, p_far);
134 divide_and_conquer(P_reduced, H2, p_far, p2);
136 H.insert(H.end(), H1.begin(), H1.end());
137 H.insert(H.end(), H2.begin(), H2.end());
141 void quickhull(const pointVec_t &points, pointVec_t &hull) {
142 if (points.size() < 2) {
143 hull.insert(hull.end(), points.begin(), points.end());
146 point_t p_maxx = extremum<FindXExtremum::maxX>(points);
147 point_t p_minx = extremum<FindXExtremum::minX>(points);
151 divide_and_conquer(points, hull, p_maxx, p_minx);
152 divide_and_conquer(points, H, p_minx, p_maxx);
153 hull.insert(hull.end(), H.begin(), H.end());
157 int main(int argc, char* argv[]) {
158 util::ParseInputArgs(argc, argv);
162 util::my_time_t tm_init, tm_start, tm_end;
164 std::cout << "Starting serial version of QUICK HULL algorithm" << std::endl;
166 tm_init = util::gettime();
167 serial_initialize(points);
168 tm_start = util::gettime();
169 std::cout << "Init time: " << util::time_diff(tm_init, tm_start) << " Points in input: " << points.size() << "\n";
170 tm_start = util::gettime();
171 quickhull(points, hull);
172 tm_end = util::gettime();
173 std::cout << "Serial time: " << util::time_diff(tm_start, tm_end) << " Points in hull: " << hull.size() << "\n";
176 #else // USETBB - parallel version of Quick Hull algorithm
178 #include "tbb/parallel_for.h"
179 #include "tbb/parallel_reduce.h"
180 #include "tbb/blocked_range.h"
182 typedef tbb::blocked_range<size_t> range_t;
185 #include "tbb/concurrent_vector.h"
187 typedef tbb::concurrent_vector<point_t> pointVec_t;
189 void appendVector(const point_t* src, size_t srcSize, pointVec_t& dest) {
190 std::copy(src, src + srcSize, dest.grow_by(srcSize));
193 void appendVector(const pointVec_t& src, pointVec_t& dest) {
194 std::copy(src.begin(), src.end(), dest.grow_by(src.size()));
197 void grow_vector_to_at_least(pointVec_t& vect, size_t size) {
198 vect.grow_to_at_least(size);
200 #else // USE STD::VECTOR - include spin_mutex.h and lock vector operations
201 #include "tbb/spin_mutex.h"
203 typedef tbb::spin_mutex mutex_t;
204 typedef std::vector<point_t> pointVec_t;
206 void appendVector(mutex_t& insertMutex, const pointVec_t& src, pointVec_t& dest) {
207 mutex_t::scoped_lock lock(insertMutex);
208 dest.insert(dest.end(), src.begin(), src.end());
211 void appendVector(mutex_t& insertMutex, const point_t* src, size_t srcSize,
213 mutex_t::scoped_lock lock(insertMutex);
214 dest.insert(dest.end(), src, src + srcSize);
217 void grow_vector_to_at_least(mutex_t& mutex, pointVec_t& vect, size_t size) {
218 mutex_t::scoped_lock lock(mutex);
219 if (vect.size()< size){
225 class FillRNDPointsVector {
228 static const size_t grainSize = cfg::generateGrainSize;
230 static mutex_t pushBackMutex;
233 explicit FillRNDPointsVector(pointVec_t& _points)
236 void operator()(const range_t& range) const {
237 util::rng the_rng(range.begin());
238 const size_t i_end = range.end();
241 points.grow_to_at_least(i_end);
242 #else // Locked enlarge to a not thread-safe STD::VECTOR
243 grow_vector_to_at_least(pushBackMutex,points,i_end);
246 for(size_t i = range.begin(); i != i_end; ++i) {
247 points[i]=util::GenerateRNDPoint<double>(count,the_rng,util::rng::max_rand);
252 class FillRNDPointsVector_buf {
255 static const size_t grainSize = cfg::generateGrainSize;
257 static mutex_t insertMutex;
260 explicit FillRNDPointsVector_buf(pointVec_t& _points)
263 void operator()(const range_t& range) const {
264 util::rng the_rng(range.begin());
265 const size_t i_end = range.end();
266 size_t count = 0, j = 0;
267 point_t tmp_vec[grainSize];
269 for(size_t i=range.begin(); i!=i_end; ++i) {
270 tmp_vec[j++] = util::GenerateRNDPoint<double>(count,the_rng,util::rng::max_rand);
273 grow_vector_to_at_least(points,range.end());
274 #else // USE STD::VECTOR
275 grow_vector_to_at_least(insertMutex,points,range.end());
277 std::copy(tmp_vec, tmp_vec+j,points.begin()+range.begin());
282 mutex_t FillRNDPointsVector::pushBackMutex = mutex_t();
283 mutex_t FillRNDPointsVector_buf::insertMutex = mutex_t();
286 template<typename BodyType>
287 void initialize(pointVec_t &points) {
288 //This function generate the same series of point on every call.
289 //Reproducibility is needed for benchmarking to produce reliable results.
290 //It is achieved through the following points:
291 // - FillRNDPointsVector_buf instance has its own local instance
292 // of random number generator, which in turn does not use any global data
293 // - tbb::simple_partitioner produce the same set of ranges on every call to
295 // - local RNG instances are seeded by the starting indexes of corresponding ranges
296 // - grow_to_at_least() enables putting points into the resulting vector in deterministic order
297 // (unlike concurrent push_back or grow_by).
299 // In the buffered version, a temporary storage for as much as grainSize elements
300 // is allocated inside the body. Since auto_partitioner may increase effective
301 // range size which would cause a crash, simple partitioner has to be used.
303 tbb::parallel_for(range_t(0, cfg::numberOfPoints, BodyType::grainSize),
304 BodyType(points), tbb::simple_partitioner());
307 class FindXExtremum {
313 static const size_t grainSize = cfg::findExtremumGrainSize;
315 FindXExtremum(const pointVec_t& points_, extremumType exType_)
316 : points(points_), exType(exType_), extrXPoint(points[0]) {}
318 FindXExtremum(const FindXExtremum& fxex, tbb::split)
319 : points(fxex.points), exType(fxex.exType), extrXPoint(fxex.extrXPoint) {}
321 void operator()(const range_t& range) {
322 const size_t i_end = range.end();
324 for(size_t i = range.begin(); i != i_end; ++i) {
325 if(closerToExtremum(points[i])) {
326 extrXPoint = points[i];
332 void join(const FindXExtremum &rhs) {
333 if(closerToExtremum(rhs.extrXPoint)) {
334 extrXPoint = rhs.extrXPoint;
338 point_t extremeXPoint() {
343 const pointVec_t &points;
344 const extremumType exType;
346 bool closerToExtremum(const point_t &p) const {
349 return p.x<extrXPoint.x; break;
351 return p.x>extrXPoint.x; break;
353 return false; // avoid warning
357 template <FindXExtremum::extremumType type>
358 point_t extremum(const pointVec_t &P) {
359 FindXExtremum fxBody(P, type);
360 tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody);
361 return fxBody.extremeXPoint();
365 const pointVec_t &initialSet;
366 pointVec_t &reducedSet;
371 static const size_t grainSize = cfg::divideGrainSize;
373 static mutex_t pushBackMutex;
376 SplitByCP( point_t _p1, point_t _p2,
377 const pointVec_t &_initialSet, pointVec_t &_reducedSet)
379 initialSet(_initialSet), reducedSet(_reducedSet),
380 howFar(0), farPoint(p1) {
383 SplitByCP( SplitByCP& sbcp, tbb::split )
384 : p1(sbcp.p1), p2(sbcp.p2),
385 initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet),
386 howFar(0), farPoint(p1) {}
388 void operator()( const range_t& range ) {
389 const size_t i_end = range.end();
391 for(size_t i=range.begin(); i!=i_end; ++i) {
392 if( (initialSet[i] != p1) && (initialSet[i] != p2) ) {
393 cp = util::cross_product(p1, p2, initialSet[i]);
396 reducedSet.push_back(initialSet[i]);
397 #else // Locked push_back to a not thread-safe STD::VECTOR
399 mutex_t::scoped_lock lock(pushBackMutex);
400 reducedSet.push_back(initialSet[i]);
404 farPoint = initialSet[i];
412 void join(const SplitByCP& rhs) {
413 if(rhs.howFar>howFar) {
415 farPoint = rhs.farPoint;
419 point_t farthestPoint() const {
424 class SplitByCP_buf {
425 const pointVec_t &initialSet;
426 pointVec_t &reducedSet;
431 static const size_t grainSize = cfg::divideGrainSize;
433 static mutex_t insertMutex;
436 SplitByCP_buf( point_t _p1, point_t _p2,
437 const pointVec_t &_initialSet, pointVec_t &_reducedSet)
439 initialSet(_initialSet), reducedSet(_reducedSet),
440 howFar(0), farPoint(p1) {}
442 SplitByCP_buf(SplitByCP_buf& sbcp, tbb::split)
443 : p1(sbcp.p1), p2(sbcp.p2),
444 initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet),
445 howFar(0), farPoint(p1) {}
447 void operator()(const range_t& range) {
448 const size_t i_end = range.end();
451 point_t tmp_vec[grainSize];
452 for(size_t i = range.begin(); i != i_end; ++i) {
453 if( (initialSet[i] != p1) && (initialSet[i] != p2) ) {
454 cp = util::cross_product(p1, p2, initialSet[i]);
456 tmp_vec[j++] = initialSet[i];
458 farPoint = initialSet[i];
466 appendVector(tmp_vec, j, reducedSet);
467 #else // USE STD::VECTOR
468 appendVector(insertMutex, tmp_vec, j, reducedSet);
472 void join(const SplitByCP_buf& rhs) {
473 if(rhs.howFar>howFar) {
475 farPoint = rhs.farPoint;
479 point_t farthestPoint() const {
485 mutex_t SplitByCP::pushBackMutex = mutex_t();
486 mutex_t SplitByCP_buf::insertMutex = mutex_t();
489 template <typename BodyType>
490 point_t divide(const pointVec_t &P, pointVec_t &P_reduced,
491 const point_t &p1, const point_t &p2) {
492 BodyType body(p1, p2, P, P_reduced);
493 // Must use simple_partitioner (see the comment in initialize() above)
494 tbb::parallel_reduce(range_t(0, P.size(), BodyType::grainSize),
495 body, tbb::simple_partitioner() );
498 std::stringstream ss;
499 ss << P.size() << " nodes in bucket"<< ", "
500 << "dividing by: [ " << p1 << ", " << p2 << " ], "
501 << "farthest node: " << body.farthestPoint();
502 util::OUTPUT.push_back(ss.str());
505 return body.farthestPoint();
508 void divide_and_conquer(const pointVec_t &P, pointVec_t &H,
509 point_t p1, point_t p2, bool buffered) {
510 assert(P.size() >= 2);
511 pointVec_t P_reduced;
516 p_far = divide<SplitByCP_buf>(P, P_reduced, p1, p2);
518 p_far = divide<SplitByCP>(P, P_reduced, p1, p2);
521 if (P_reduced.size()<2) {
524 appendVector(P_reduced, H);
525 #else // insert into STD::VECTOR
526 H.insert(H.end(), P_reduced.begin(), P_reduced.end());
530 divide_and_conquer(P_reduced, H1, p1, p_far, buffered);
531 divide_and_conquer(P_reduced, H2, p_far, p2, buffered);
536 #else // insert into STD::VECTOR
537 H.insert(H.end(), H1.begin(), H1.end());
538 H.insert(H.end(), H2.begin(), H2.end());
543 void quickhull(const pointVec_t &points, pointVec_t &hull, bool buffered) {
544 if (points.size() < 2) {
546 appendVector(points, hull);
548 hull.insert(hull.end(), points.begin(), points.end());
553 point_t p_maxx = extremum<FindXExtremum::maxX>(points);
554 point_t p_minx = extremum<FindXExtremum::minX>(points);
558 divide_and_conquer(points, hull, p_maxx, p_minx, buffered);
559 divide_and_conquer(points, H, p_minx, p_maxx, buffered);
561 appendVector(H, hull);
563 hull.insert(hull.end(), H.begin(), H.end());
567 int main(int argc, char* argv[]) {
568 utility::thread_number_range threads(utility::get_default_num_threads);
569 util::ParseInputArgs(argc, argv, threads);
572 util::my_time_t tm_init, tm_start, tm_end;
575 std::cout << "Starting TBB unbuffered push_back version of QUICK HULL algorithm" << std::endl;
577 std::cout << "Starting STL locked unbuffered push_back version of QUICK HULL algorithm" << std::endl;
580 for(nthreads=threads.first; nthreads<=threads.last; nthreads=threads.step(nthreads)) {
584 tbb::global_control c(tbb::global_control::max_allowed_parallelism, nthreads);
585 tm_init = util::gettime();
586 initialize<FillRNDPointsVector>(points);
587 tm_start = util::gettime();
588 std::cout << "Parallel init time on " << nthreads << " threads: " << util::time_diff(tm_init, tm_start) << " Points in input: " << points.size() << "\n";
590 tm_start = util::gettime();
591 quickhull(points, hull, false);
592 tm_end = util::gettime();
593 std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end) << " Points in hull: " << hull.size() << "\n";
597 std::cout << "Starting TBB buffered version of QUICK HULL algorithm" << std::endl;
599 std::cout << "Starting STL locked buffered version of QUICK HULL algorithm" << std::endl;
602 for(nthreads=threads.first; nthreads<=threads.last; nthreads=threads.step(nthreads)) {
606 tbb::global_control c(tbb::global_control::max_allowed_parallelism, nthreads);
608 tm_init = util::gettime();
609 initialize<FillRNDPointsVector_buf>(points);
610 tm_start = util::gettime();
611 std::cout << "Init time on " << nthreads << " threads: " << util::time_diff(tm_init, tm_start) << " Points in input: " << points.size() << "\n";
613 tm_start = util::gettime();
614 quickhull(points, hull, true);
615 tm_end = util::gettime();
616 std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end) << " Points in hull: " << hull.size() << "\n";
624 void serial_initialize(pointVec_t &points) {
625 points.reserve(cfg::numberOfPoints);
627 unsigned int rseed=1;
628 for(size_t i=0, count=0; long(i)<cfg::numberOfPoints; ++i) {
629 points.push_back(util::GenerateRNDPoint<double>(count,&std::rand,RAND_MAX ));