examples/parallel_reduce/convex_hull/convex_hull_bench.cpp

   1 /*
   2     Copyright (c) 2005-2019 Intel Corporation
   3
   4     Licensed under the Apache License, Version 2.0 (the "License");
   5     you may not use this file except in compliance with the License.
   6     You may obtain a copy of the License at
   7
   8         http://www.apache.org/licenses/LICENSE-2.0
   9
  10     Unless required by applicable law or agreed to in writing, software
  11     distributed under the License is distributed on an "AS IS" BASIS,
  12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13     See the License for the specific language governing permissions and
  14     limitations under the License.
  15 */
  16
  17 /*
  18     This file contains a few implementations, so it may look overly complicated.
  19     The most efficient implementation is also separated into convex_hull_sample.cpp
  20 */
  21 #include "convex_hull.h"
  22
  23 typedef util::point<double> point_t;
  24
  25 #ifndef USETBB
  26     #define USETBB      1
  27 #endif
  28 #ifndef USECONCVEC
  29     #define USECONCVEC   1
  30 #endif
  31
  32 #if !USETBB // Serial implementation of Quick Hull algorithm
  33
  34 typedef std::vector< point_t > pointVec_t;
  35
  36 void serial_initialize(pointVec_t &points);
  37
  38 // C++ style serial code
  39
  40 class FindXExtremum : public std::unary_function<const point_t&, void> {
  41 public:
  42     typedef enum {
  43         minX, maxX
  44     } extremumType;
  45
  46     FindXExtremum(const point_t& frstPoint, extremumType exType_)
  47         : extrXPoint(frstPoint), exType(exType_) {}
  48
  49     void operator()(const point_t& p) {
  50         if(closerToExtremum(p))
  51             extrXPoint = p;
  52     }
  53
  54     operator point_t () {
  55         return extrXPoint;
  56     }
  57
  58 private:
  59     const extremumType   exType;
  60     point_t              extrXPoint;
  61
  62     bool closerToExtremum(const point_t &p) const {
  63         switch(exType) {
  64         case minX:
  65             return p.x<extrXPoint.x; break;
  66         case maxX:
  67             return p.x>extrXPoint.x; break;
  68         }
  69         return false; // avoid warning
  70     }
  71 };
  72
  73 template <FindXExtremum::extremumType type>
  74 point_t extremum(const pointVec_t &points) {
  75     assert(!points.empty());
  76     return std::for_each(points.begin(), points.end(), FindXExtremum(points[0], type));
  77 }
  78
  79 class SplitByCP : public std::unary_function<const point_t&, void> {
  80     pointVec_t          &reducedSet;
  81     point_t              p1, p2;
  82     point_t              farPoint;
  83     double               howFar;
  84 public:
  85
  86     SplitByCP( point_t _p1, point_t _p2, pointVec_t &_reducedSet)
  87         : p1(_p1), p2(_p2), reducedSet(_reducedSet), howFar(0), farPoint(p1) {}
  88
  89     void operator()(const point_t& p) {
  90         double cp;
  91         if( (p != p1) && (p != p2) ) {
  92             cp = util::cross_product(p1, p2, p);
  93             if(cp>0) {
  94                 reducedSet.push_back(p);
  95                 if(cp>howFar) {
  96                     farPoint = p;
  97                     howFar   = cp;
  98                 }
  99             }
 100         }
 101     }
 102
 103     operator point_t (){
 104         return farPoint;
 105     }
 106 };
 107
 108 point_t divide(const pointVec_t &P, pointVec_t &P_reduced, const point_t &p1, const point_t &p2) {
 109     SplitByCP splitByCP(p1, p2, P_reduced);
 110     point_t farPoint = std::for_each(P.begin(), P.end(), splitByCP);
 111
 112     if(util::verbose) {
 113         std::stringstream ss;
 114         ss << P.size() << " nodes in bucket"<< ", "
 115             << "dividing by: [ " << p1 << ", " << p2 << " ], "
 116             << "farthest node: " << farPoint;
 117         util::OUTPUT.push_back(ss.str());
 118     }
 119
 120     return farPoint;
 121 }
 122
 123 void divide_and_conquer(const pointVec_t &P, pointVec_t &H, point_t p1, point_t p2) {
 124     assert(P.size() >= 2);
 125     pointVec_t P_reduced;
 126     pointVec_t H1, H2;
 127     point_t p_far = divide(P, P_reduced, p1, p2);
 128     if (P_reduced.size()<2) {
 129         H.push_back(p1);
 130         H.insert(H.end(), P_reduced.begin(), P_reduced.end());
 131     }
 132     else {
 133         divide_and_conquer(P_reduced, H1, p1, p_far);
 134         divide_and_conquer(P_reduced, H2, p_far, p2);
 135
 136         H.insert(H.end(), H1.begin(), H1.end());
 137         H.insert(H.end(), H2.begin(), H2.end());
 138     }
 139 }
 140
 141 void quickhull(const pointVec_t &points, pointVec_t &hull) {
 142     if (points.size() < 2) {
 143         hull.insert(hull.end(), points.begin(), points.end());
 144         return;
 145     }
 146     point_t p_maxx = extremum<FindXExtremum::maxX>(points);
 147     point_t p_minx = extremum<FindXExtremum::minX>(points);
 148
 149     pointVec_t H;
 150
 151     divide_and_conquer(points, hull, p_maxx, p_minx);
 152     divide_and_conquer(points, H, p_minx, p_maxx);
 153     hull.insert(hull.end(), H.begin(), H.end());
 154 }
 155
 156
 157 int main(int argc, char* argv[]) {
 158     util::ParseInputArgs(argc, argv);
 159
 160     pointVec_t      points;
 161     pointVec_t      hull;
 162     util::my_time_t tm_init, tm_start, tm_end;
 163
 164     std::cout << "Starting serial version of QUICK HULL algorithm" << std::endl;
 165
 166     tm_init = util::gettime();
 167     serial_initialize(points);
 168     tm_start = util::gettime();
 169     std::cout << "Init time: " << util::time_diff(tm_init, tm_start) << "  Points in input: " << points.size() << "\n";
 170     tm_start = util::gettime();
 171     quickhull(points, hull);
 172     tm_end = util::gettime();
 173     std::cout << "Serial time: " << util::time_diff(tm_start, tm_end) << "  Points in hull: " << hull.size() << "\n";
 174 }
 175
 176 #else // USETBB - parallel version of Quick Hull algorithm
 177
 178 #include "tbb/parallel_for.h"
 179 #include "tbb/parallel_reduce.h"
 180 #include "tbb/blocked_range.h"
 181
 182 typedef tbb::blocked_range<size_t> range_t;
 183
 184 #if USECONCVEC
 185 #include "tbb/concurrent_vector.h"
 186
 187 typedef tbb::concurrent_vector<point_t> pointVec_t;
 188
 189 void appendVector(const point_t* src, size_t srcSize, pointVec_t& dest) {
 190     std::copy(src, src + srcSize, dest.grow_by(srcSize));
 191 }
 192
 193 void appendVector(const pointVec_t& src, pointVec_t& dest) {
 194     std::copy(src.begin(), src.end(), dest.grow_by(src.size()));
 195 }
 196
 197 void grow_vector_to_at_least(pointVec_t& vect, size_t size) {
 198     vect.grow_to_at_least(size);
 199 }
 200 #else // USE STD::VECTOR - include spin_mutex.h and lock vector operations
 201 #include "tbb/spin_mutex.h"
 202
 203 typedef tbb::spin_mutex      mutex_t;
 204 typedef std::vector<point_t> pointVec_t;
 205
 206 void appendVector(mutex_t& insertMutex, const pointVec_t& src, pointVec_t& dest) {
 207     mutex_t::scoped_lock lock(insertMutex);
 208     dest.insert(dest.end(), src.begin(), src.end());
 209 }
 210
 211 void appendVector(mutex_t& insertMutex, const point_t* src, size_t srcSize,
 212                   pointVec_t& dest) {
 213     mutex_t::scoped_lock lock(insertMutex);
 214     dest.insert(dest.end(), src, src + srcSize);
 215 }
 216
 217 void grow_vector_to_at_least(mutex_t& mutex, pointVec_t& vect, size_t size) {
 218     mutex_t::scoped_lock lock(mutex);
 219     if (vect.size()< size){
 220         vect.resize(size);
 221     }
 222 }
 223 #endif // USECONCVEC
 224
 225 class FillRNDPointsVector {
 226     pointVec_t          &points;
 227 public:
 228     static const size_t  grainSize = cfg::generateGrainSize;
 229 #if !USECONCVEC
 230     static mutex_t       pushBackMutex;
 231 #endif // USECONCVEC
 232
 233     explicit FillRNDPointsVector(pointVec_t& _points)
 234         : points(_points){}
 235
 236     void operator()(const range_t& range) const {
 237         util::rng the_rng(range.begin());
 238         const size_t i_end = range.end();
 239         size_t count = 0;
 240 #if USECONCVEC
 241             points.grow_to_at_least(i_end);
 242 #else // Locked enlarge to a not thread-safe STD::VECTOR
 243             grow_vector_to_at_least(pushBackMutex,points,i_end);
 244 #endif // USECONCVEC
 245
 246         for(size_t i = range.begin(); i != i_end; ++i) {
 247             points[i]=util::GenerateRNDPoint<double>(count,the_rng,util::rng::max_rand);
 248         }
 249     }
 250 };
 251
 252 class FillRNDPointsVector_buf {
 253     pointVec_t          &points;
 254 public:
 255     static const size_t  grainSize = cfg::generateGrainSize;
 256 #if !USECONCVEC
 257     static mutex_t       insertMutex;
 258 #endif // USECONCVEC
 259
 260     explicit FillRNDPointsVector_buf(pointVec_t& _points)
 261         : points(_points){}
 262
 263     void operator()(const range_t& range) const {
 264         util::rng the_rng(range.begin());
 265         const size_t i_end = range.end();
 266         size_t count = 0, j = 0;
 267         point_t tmp_vec[grainSize];
 268
 269         for(size_t i=range.begin(); i!=i_end; ++i) {
 270             tmp_vec[j++] = util::GenerateRNDPoint<double>(count,the_rng,util::rng::max_rand);
 271         }
 272 #if USECONCVEC
 273         grow_vector_to_at_least(points,range.end());
 274 #else // USE STD::VECTOR
 275         grow_vector_to_at_least(insertMutex,points,range.end());
 276 #endif // USECONCVEC
 277         std::copy(tmp_vec, tmp_vec+j,points.begin()+range.begin());
 278     }
 279 };
 280
 281 #if !USECONCVEC
 282 mutex_t FillRNDPointsVector::pushBackMutex   = mutex_t();
 283 mutex_t FillRNDPointsVector_buf::insertMutex = mutex_t();
 284 #endif
 285
 286 template<typename BodyType>
 287 void initialize(pointVec_t &points) {
 288     //This function generate the same series of point on every call.
 289     //Reproducibility is needed for benchmarking to produce reliable results.
 290     //It is achieved through the following points:
 291     //      - FillRNDPointsVector_buf instance has its own local instance
 292     //        of random number generator, which in turn does not use any global data
 293     //      - tbb::simple_partitioner produce the same set of ranges on every call to
 294     //        tbb::parallel_for
 295     //      - local RNG instances are seeded by the starting indexes of corresponding ranges
 296     //      - grow_to_at_least() enables putting points into the resulting vector in deterministic order
 297     //        (unlike concurrent push_back or grow_by).
 298
 299     // In the buffered version, a temporary storage for as much as grainSize elements
 300     // is allocated inside the body. Since auto_partitioner may increase effective
 301     // range size which would cause a crash, simple partitioner has to be used.
 302
 303     tbb::parallel_for(range_t(0, cfg::numberOfPoints, BodyType::grainSize),
 304     BodyType(points), tbb::simple_partitioner());
 305 }
 306
 307 class FindXExtremum {
 308 public:
 309     typedef enum {
 310         minX, maxX
 311     } extremumType;
 312
 313     static const size_t  grainSize = cfg::findExtremumGrainSize;
 314
 315     FindXExtremum(const pointVec_t& points_, extremumType exType_)
 316         : points(points_), exType(exType_), extrXPoint(points[0]) {}
 317
 318     FindXExtremum(const FindXExtremum& fxex, tbb::split)
 319         : points(fxex.points), exType(fxex.exType), extrXPoint(fxex.extrXPoint) {}
 320
 321     void operator()(const range_t& range) {
 322         const size_t i_end = range.end();
 323         if(!range.empty()) {
 324             for(size_t i = range.begin(); i != i_end; ++i) {
 325                 if(closerToExtremum(points[i])) {
 326                     extrXPoint = points[i];
 327                 }
 328             }
 329         }
 330     }
 331
 332     void join(const FindXExtremum &rhs) {
 333         if(closerToExtremum(rhs.extrXPoint)) {
 334             extrXPoint = rhs.extrXPoint;
 335         }
 336     }
 337
 338     point_t extremeXPoint() {
 339         return extrXPoint;
 340     }
 341
 342 private:
 343     const pointVec_t    &points;
 344     const extremumType   exType;
 345     point_t              extrXPoint;
 346     bool closerToExtremum(const point_t &p) const {
 347         switch(exType) {
 348         case minX:
 349             return p.x<extrXPoint.x; break;
 350         case maxX:
 351             return p.x>extrXPoint.x; break;
 352         }
 353         return false; // avoid warning
 354     }
 355 };
 356
 357 template <FindXExtremum::extremumType type>
 358 point_t extremum(const pointVec_t &P) {
 359     FindXExtremum fxBody(P, type);
 360     tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody);
 361     return fxBody.extremeXPoint();
 362 }
 363
 364 class SplitByCP {
 365     const pointVec_t    &initialSet;
 366     pointVec_t          &reducedSet;
 367     point_t              p1, p2;
 368     point_t              farPoint;
 369     double               howFar;
 370 public:
 371     static const size_t grainSize = cfg::divideGrainSize;
 372 #if !USECONCVEC
 373     static mutex_t      pushBackMutex;
 374 #endif // USECONCVEC
 375
 376     SplitByCP( point_t _p1, point_t _p2,
 377         const pointVec_t &_initialSet, pointVec_t &_reducedSet)
 378         : p1(_p1), p2(_p2),
 379         initialSet(_initialSet), reducedSet(_reducedSet),
 380         howFar(0), farPoint(p1) {
 381     }
 382
 383     SplitByCP( SplitByCP& sbcp, tbb::split )
 384         : p1(sbcp.p1), p2(sbcp.p2),
 385         initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet),
 386         howFar(0), farPoint(p1) {}
 387
 388     void operator()( const range_t& range ) {
 389         const size_t i_end = range.end();
 390         double cp;
 391         for(size_t i=range.begin(); i!=i_end; ++i) {
 392             if( (initialSet[i] != p1) && (initialSet[i] != p2) ) {
 393                 cp = util::cross_product(p1, p2, initialSet[i]);
 394                 if(cp>0) {
 395 #if USECONCVEC
 396                     reducedSet.push_back(initialSet[i]);
 397 #else // Locked push_back to a not thread-safe STD::VECTOR
 398                     {
 399                         mutex_t::scoped_lock lock(pushBackMutex);
 400                         reducedSet.push_back(initialSet[i]);
 401                     }
 402 #endif // USECONCVEC
 403                     if(cp>howFar) {
 404                         farPoint = initialSet[i];
 405                         howFar   = cp;
 406                     }
 407                 }
 408             }
 409         }
 410     }
 411
 412     void join(const SplitByCP& rhs) {
 413         if(rhs.howFar>howFar) {
 414             howFar   = rhs.howFar;
 415             farPoint = rhs.farPoint;
 416         }
 417     }
 418
 419     point_t farthestPoint() const {
 420         return farPoint;
 421     }
 422 };
 423
 424 class SplitByCP_buf {
 425     const pointVec_t    &initialSet;
 426     pointVec_t          &reducedSet;
 427     point_t              p1, p2;
 428     point_t              farPoint;
 429     double               howFar;
 430 public:
 431     static const size_t  grainSize = cfg::divideGrainSize;
 432 #if !USECONCVEC
 433     static mutex_t       insertMutex;
 434 #endif // USECONCVEC
 435
 436     SplitByCP_buf( point_t _p1, point_t _p2,
 437         const pointVec_t &_initialSet, pointVec_t &_reducedSet)
 438         : p1(_p1), p2(_p2),
 439         initialSet(_initialSet), reducedSet(_reducedSet),
 440         howFar(0), farPoint(p1) {}
 441
 442     SplitByCP_buf(SplitByCP_buf& sbcp, tbb::split)
 443         : p1(sbcp.p1), p2(sbcp.p2),
 444         initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet),
 445         howFar(0), farPoint(p1) {}
 446
 447     void operator()(const range_t& range) {
 448         const size_t i_end = range.end();
 449         size_t j = 0;
 450         double cp;
 451         point_t tmp_vec[grainSize];
 452         for(size_t i = range.begin(); i != i_end; ++i) {
 453             if( (initialSet[i] != p1) && (initialSet[i] != p2) ) {
 454                 cp = util::cross_product(p1, p2, initialSet[i]);
 455                 if(cp>0) {
 456                     tmp_vec[j++] = initialSet[i];
 457                     if(cp>howFar) {
 458                         farPoint = initialSet[i];
 459                         howFar   = cp;
 460                     }
 461                 }
 462             }
 463         }
 464
 465 #if USECONCVEC
 466         appendVector(tmp_vec, j, reducedSet);
 467 #else // USE STD::VECTOR
 468         appendVector(insertMutex, tmp_vec, j, reducedSet);
 469 #endif // USECONCVEC
 470     }
 471
 472     void join(const SplitByCP_buf& rhs) {
 473         if(rhs.howFar>howFar) {
 474             howFar   = rhs.howFar;
 475             farPoint = rhs.farPoint;
 476         }
 477     }
 478
 479     point_t farthestPoint() const {
 480         return farPoint;
 481     }
 482 };
 483
 484 #if !USECONCVEC
 485 mutex_t SplitByCP::pushBackMutex   = mutex_t();
 486 mutex_t SplitByCP_buf::insertMutex = mutex_t();
 487 #endif
 488
 489 template <typename BodyType>
 490 point_t divide(const pointVec_t &P, pointVec_t &P_reduced,
 491               const point_t &p1, const point_t &p2) {
 492     BodyType body(p1, p2, P, P_reduced);
 493     // Must use simple_partitioner (see the comment in initialize() above)
 494     tbb::parallel_reduce(range_t(0, P.size(), BodyType::grainSize),
 495                          body, tbb::simple_partitioner() );
 496
 497     if(util::verbose) {
 498         std::stringstream ss;
 499         ss << P.size() << " nodes in bucket"<< ", "
 500             << "dividing by: [ " << p1 << ", " << p2 << " ], "
 501             << "farthest node: " << body.farthestPoint();
 502         util::OUTPUT.push_back(ss.str());
 503     }
 504
 505     return body.farthestPoint();
 506 }
 507
 508 void divide_and_conquer(const pointVec_t &P, pointVec_t &H,
 509                         point_t p1, point_t p2, bool buffered) {
 510     assert(P.size() >= 2);
 511     pointVec_t P_reduced;
 512     pointVec_t H1, H2;
 513     point_t p_far;
 514
 515     if(buffered) {
 516         p_far = divide<SplitByCP_buf>(P, P_reduced, p1, p2);
 517     } else {
 518         p_far = divide<SplitByCP>(P, P_reduced, p1, p2);
 519     }
 520
 521     if (P_reduced.size()<2) {
 522         H.push_back(p1);
 523 #if USECONCVEC
 524         appendVector(P_reduced, H);
 525 #else // insert into STD::VECTOR
 526         H.insert(H.end(), P_reduced.begin(), P_reduced.end());
 527 #endif
 528     }
 529     else {
 530         divide_and_conquer(P_reduced, H1, p1, p_far, buffered);
 531         divide_and_conquer(P_reduced, H2, p_far, p2, buffered);
 532
 533 #if USECONCVEC
 534         appendVector(H1, H);
 535         appendVector(H2, H);
 536 #else // insert into STD::VECTOR
 537         H.insert(H.end(), H1.begin(), H1.end());
 538         H.insert(H.end(), H2.begin(), H2.end());
 539 #endif
 540     }
 541 }
 542
 543 void quickhull(const pointVec_t &points, pointVec_t &hull, bool buffered) {
 544     if (points.size() < 2) {
 545 #if USECONCVEC
 546         appendVector(points, hull);
 547 #else // STD::VECTOR
 548         hull.insert(hull.end(), points.begin(), points.end());
 549 #endif // USECONCVEC
 550         return;
 551     }
 552
 553     point_t p_maxx = extremum<FindXExtremum::maxX>(points);
 554     point_t p_minx = extremum<FindXExtremum::minX>(points);
 555
 556     pointVec_t H;
 557
 558     divide_and_conquer(points, hull, p_maxx, p_minx, buffered);
 559     divide_and_conquer(points, H, p_minx, p_maxx, buffered);
 560 #if USECONCVEC
 561     appendVector(H, hull);
 562 #else // STD::VECTOR
 563     hull.insert(hull.end(), H.begin(), H.end());
 564 #endif // USECONCVEC
 565 }
 566
 567 int main(int argc, char* argv[]) {
 568     utility::thread_number_range threads(utility::get_default_num_threads);
 569     util::ParseInputArgs(argc, argv, threads);
 570
 571     int             nthreads;
 572     util::my_time_t tm_init, tm_start, tm_end;
 573
 574 #if USECONCVEC
 575     std::cout << "Starting TBB unbuffered push_back version of QUICK HULL algorithm" << std::endl;
 576 #else
 577     std::cout << "Starting STL locked unbuffered push_back version of QUICK HULL algorithm" << std::endl;
 578 #endif // USECONCVEC
 579
 580     for(nthreads=threads.first; nthreads<=threads.last; nthreads=threads.step(nthreads)) {
 581         pointVec_t      points;
 582         pointVec_t      hull;
 583
 584         tbb::global_control c(tbb::global_control::max_allowed_parallelism, nthreads);
 585         tm_init = util::gettime();
 586         initialize<FillRNDPointsVector>(points);
 587         tm_start = util::gettime();
 588         std::cout << "Parallel init time on " << nthreads << " threads: " << util::time_diff(tm_init, tm_start) << "  Points in input: " << points.size() << "\n";
 589
 590         tm_start = util::gettime();
 591         quickhull(points, hull, false);
 592         tm_end = util::gettime();
 593         std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end) << "  Points in hull: " << hull.size() << "\n";
 594     }
 595
 596 #if USECONCVEC
 597     std::cout << "Starting TBB buffered version of QUICK HULL algorithm" << std::endl;
 598 #else
 599     std::cout << "Starting STL locked buffered version of QUICK HULL algorithm" << std::endl;
 600 #endif
 601
 602     for(nthreads=threads.first; nthreads<=threads.last; nthreads=threads.step(nthreads)) {
 603         pointVec_t      points;
 604         pointVec_t      hull;
 605
 606         tbb::global_control c(tbb::global_control::max_allowed_parallelism, nthreads);
 607
 608         tm_init = util::gettime();
 609         initialize<FillRNDPointsVector_buf>(points);
 610         tm_start = util::gettime();
 611         std::cout << "Init time on " << nthreads << " threads: " << util::time_diff(tm_init, tm_start) << "  Points in input: " << points.size() << "\n";
 612
 613         tm_start = util::gettime();
 614         quickhull(points, hull, true);
 615         tm_end = util::gettime();
 616         std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end) << "  Points in hull: " << hull.size() << "\n";
 617     }
 618
 619     return 0;
 620 }
 621
 622 #endif // USETBB
 623
 624 void serial_initialize(pointVec_t &points) {
 625     points.reserve(cfg::numberOfPoints);
 626
 627     unsigned int rseed=1;
 628     for(size_t i=0, count=0; long(i)<cfg::numberOfPoints; ++i) {
 629         points.push_back(util::GenerateRNDPoint<double>(count,&std::rand,RAND_MAX ));
 630     }
 631 }