[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / BulletDynamics / ConstraintSolver / btBatchedConstraints.cpp
1 /*
2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15
16 #include "btBatchedConstraints.h"
17
18 #include "LinearMath/btIDebugDraw.h"
19 #include "LinearMath/btMinMax.h"
20 #include "LinearMath/btStackAlloc.h"
21 #include "LinearMath/btQuickprof.h"
22
23 #include <string.h>  //for memset
24
25 #include <cmath>
26
27 const int kNoMerge = -1;
28
29 bool btBatchedConstraints::s_debugDrawBatches = false;
30
31 struct btBatchedConstraintInfo
32 {
33         int constraintIndex;
34         int numConstraintRows;
35         int bodyIds[2];
36 };
37
38 struct btBatchInfo
39 {
40         int numConstraints;
41         int mergeIndex;
42
43         btBatchInfo() : numConstraints(0), mergeIndex(kNoMerge) {}
44 };
45
46 bool btBatchedConstraints::validate(btConstraintArray* constraints, const btAlignedObjectArray<btSolverBody>& bodies) const
47 {
48         //
49         // validate: for debugging only. Verify coloring of bodies, that no body is touched by more than one batch in any given phase
50         //
51         int errors = 0;
52         const int kUnassignedBatch = -1;
53
54         btAlignedObjectArray<int> bodyBatchId;
55         for (int iPhase = 0; iPhase < m_phases.size(); ++iPhase)
56         {
57                 bodyBatchId.resizeNoInitialize(0);
58                 bodyBatchId.resize(bodies.size(), kUnassignedBatch);
59                 const Range& phase = m_phases[iPhase];
60                 for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch)
61                 {
62                         const Range& batch = m_batches[iBatch];
63                         for (int iiCons = batch.begin; iiCons < batch.end; ++iiCons)
64                         {
65                                 int iCons = m_constraintIndices[iiCons];
66                                 const btSolverConstraint& cons = constraints->at(iCons);
67                                 const btSolverBody& bodyA = bodies[cons.m_solverBodyIdA];
68                                 const btSolverBody& bodyB = bodies[cons.m_solverBodyIdB];
69                                 if (!bodyA.internalGetInvMass().isZero())
70                                 {
71                                         int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdA];
72                                         if (thisBodyBatchId == kUnassignedBatch)
73                                         {
74                                                 bodyBatchId[cons.m_solverBodyIdA] = iBatch;
75                                         }
76                                         else if (thisBodyBatchId != iBatch)
77                                         {
78                                                 btAssert(!"dynamic body is used in 2 different batches in the same phase");
79                                                 errors++;
80                                         }
81                                 }
82                                 if (!bodyB.internalGetInvMass().isZero())
83                                 {
84                                         int thisBodyBatchId = bodyBatchId[cons.m_solverBodyIdB];
85                                         if (thisBodyBatchId == kUnassignedBatch)
86                                         {
87                                                 bodyBatchId[cons.m_solverBodyIdB] = iBatch;
88                                         }
89                                         else if (thisBodyBatchId != iBatch)
90                                         {
91                                                 btAssert(!"dynamic body is used in 2 different batches in the same phase");
92                                                 errors++;
93                                         }
94                                 }
95                         }
96                 }
97         }
98         return errors == 0;
99 }
100
101 static void debugDrawSingleBatch(const btBatchedConstraints* bc,
102                                                                  btConstraintArray* constraints,
103                                                                  const btAlignedObjectArray<btSolverBody>& bodies,
104                                                                  int iBatch,
105                                                                  const btVector3& color,
106                                                                  const btVector3& offset)
107 {
108         if (bc && bc->m_debugDrawer && iBatch < bc->m_batches.size())
109         {
110                 const btBatchedConstraints::Range& b = bc->m_batches[iBatch];
111                 for (int iiCon = b.begin; iiCon < b.end; ++iiCon)
112                 {
113                         int iCon = bc->m_constraintIndices[iiCon];
114                         const btSolverConstraint& con = constraints->at(iCon);
115                         int iBody0 = con.m_solverBodyIdA;
116                         int iBody1 = con.m_solverBodyIdB;
117                         btVector3 pos0 = bodies[iBody0].getWorldTransform().getOrigin() + offset;
118                         btVector3 pos1 = bodies[iBody1].getWorldTransform().getOrigin() + offset;
119                         bc->m_debugDrawer->drawLine(pos0, pos1, color);
120                 }
121         }
122 }
123
124 static void debugDrawPhase(const btBatchedConstraints* bc,
125                                                    btConstraintArray* constraints,
126                                                    const btAlignedObjectArray<btSolverBody>& bodies,
127                                                    int iPhase,
128                                                    const btVector3& color0,
129                                                    const btVector3& color1,
130                                                    const btVector3& offset)
131 {
132         BT_PROFILE("debugDrawPhase");
133         if (bc && bc->m_debugDrawer && iPhase < bc->m_phases.size())
134         {
135                 const btBatchedConstraints::Range& phase = bc->m_phases[iPhase];
136                 for (int iBatch = phase.begin; iBatch < phase.end; ++iBatch)
137                 {
138                         float tt = float(iBatch - phase.begin) / float(btMax(1, phase.end - phase.begin - 1));
139                         btVector3 col = lerp(color0, color1, tt);
140                         debugDrawSingleBatch(bc, constraints, bodies, iBatch, col, offset);
141                 }
142         }
143 }
144
145 static void debugDrawAllBatches(const btBatchedConstraints* bc,
146                                                                 btConstraintArray* constraints,
147                                                                 const btAlignedObjectArray<btSolverBody>& bodies)
148 {
149         BT_PROFILE("debugDrawAllBatches");
150         if (bc && bc->m_debugDrawer && bc->m_phases.size() > 0)
151         {
152                 btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT);
153                 btVector3 bboxMax = -bboxMin;
154                 for (int iBody = 0; iBody < bodies.size(); ++iBody)
155                 {
156                         const btVector3& pos = bodies[iBody].getWorldTransform().getOrigin();
157                         bboxMin.setMin(pos);
158                         bboxMax.setMax(pos);
159                 }
160                 btVector3 bboxExtent = bboxMax - bboxMin;
161                 btVector3 offsetBase = btVector3(0, bboxExtent.y() * 1.1f, 0);
162                 btVector3 offsetStep = btVector3(0, 0, bboxExtent.z() * 1.1f);
163                 int numPhases = bc->m_phases.size();
164                 for (int iPhase = 0; iPhase < numPhases; ++iPhase)
165                 {
166                         float b = float(iPhase) / float(numPhases - 1);
167                         btVector3 color0 = btVector3(1, 0, b);
168                         btVector3 color1 = btVector3(0, 1, b);
169                         btVector3 offset = offsetBase + offsetStep * (float(iPhase) - float(numPhases - 1) * 0.5);
170                         debugDrawPhase(bc, constraints, bodies, iPhase, color0, color1, offset);
171                 }
172         }
173 }
174
175 static void initBatchedBodyDynamicFlags(btAlignedObjectArray<bool>* outBodyDynamicFlags, const btAlignedObjectArray<btSolverBody>& bodies)
176 {
177         BT_PROFILE("initBatchedBodyDynamicFlags");
178         btAlignedObjectArray<bool>& bodyDynamicFlags = *outBodyDynamicFlags;
179         bodyDynamicFlags.resizeNoInitialize(bodies.size());
180         for (int i = 0; i < bodies.size(); ++i)
181         {
182                 const btSolverBody& body = bodies[i];
183                 bodyDynamicFlags[i] = (body.internalGetInvMass().x() > btScalar(0));
184         }
185 }
186
187 static int runLengthEncodeConstraintInfo(btBatchedConstraintInfo* outConInfos, int numConstraints)
188 {
189         BT_PROFILE("runLengthEncodeConstraintInfo");
190         // detect and run-length encode constraint rows that repeat the same bodies
191         int iDest = 0;
192         int iSrc = 0;
193         while (iSrc < numConstraints)
194         {
195                 const btBatchedConstraintInfo& srcConInfo = outConInfos[iSrc];
196                 btBatchedConstraintInfo& conInfo = outConInfos[iDest];
197                 conInfo.constraintIndex = iSrc;
198                 conInfo.bodyIds[0] = srcConInfo.bodyIds[0];
199                 conInfo.bodyIds[1] = srcConInfo.bodyIds[1];
200                 while (iSrc < numConstraints && outConInfos[iSrc].bodyIds[0] == srcConInfo.bodyIds[0] && outConInfos[iSrc].bodyIds[1] == srcConInfo.bodyIds[1])
201                 {
202                         ++iSrc;
203                 }
204                 conInfo.numConstraintRows = iSrc - conInfo.constraintIndex;
205                 ++iDest;
206         }
207         return iDest;
208 }
209
210 struct ReadSolverConstraintsLoop : public btIParallelForBody
211 {
212         btBatchedConstraintInfo* m_outConInfos;
213         btConstraintArray* m_constraints;
214
215         ReadSolverConstraintsLoop(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints)
216         {
217                 m_outConInfos = outConInfos;
218                 m_constraints = constraints;
219         }
220         void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
221         {
222                 for (int i = iBegin; i < iEnd; ++i)
223                 {
224                         btBatchedConstraintInfo& conInfo = m_outConInfos[i];
225                         const btSolverConstraint& con = m_constraints->at(i);
226                         conInfo.bodyIds[0] = con.m_solverBodyIdA;
227                         conInfo.bodyIds[1] = con.m_solverBodyIdB;
228                         conInfo.constraintIndex = i;
229                         conInfo.numConstraintRows = 1;
230                 }
231         }
232 };
233
234 static int initBatchedConstraintInfo(btBatchedConstraintInfo* outConInfos, btConstraintArray* constraints)
235 {
236         BT_PROFILE("initBatchedConstraintInfo");
237         int numConstraints = constraints->size();
238         bool inParallel = true;
239         if (inParallel)
240         {
241                 ReadSolverConstraintsLoop loop(outConInfos, constraints);
242                 int grainSize = 1200;
243                 btParallelFor(0, numConstraints, grainSize, loop);
244         }
245         else
246         {
247                 for (int i = 0; i < numConstraints; ++i)
248                 {
249                         btBatchedConstraintInfo& conInfo = outConInfos[i];
250                         const btSolverConstraint& con = constraints->at(i);
251                         conInfo.bodyIds[0] = con.m_solverBodyIdA;
252                         conInfo.bodyIds[1] = con.m_solverBodyIdB;
253                         conInfo.constraintIndex = i;
254                         conInfo.numConstraintRows = 1;
255                 }
256         }
257         bool useRunLengthEncoding = true;
258         if (useRunLengthEncoding)
259         {
260                 numConstraints = runLengthEncodeConstraintInfo(outConInfos, numConstraints);
261         }
262         return numConstraints;
263 }
264
265 static void expandConstraintRowsInPlace(int* constraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
266 {
267         BT_PROFILE("expandConstraintRowsInPlace");
268         if (numConstraintRows > numConstraints)
269         {
270                 // we walk the array in reverse to avoid overwriteing
271                 for (int iCon = numConstraints - 1; iCon >= 0; --iCon)
272                 {
273                         const btBatchedConstraintInfo& conInfo = conInfos[iCon];
274                         int iBatch = constraintBatchIds[iCon];
275                         for (int i = conInfo.numConstraintRows - 1; i >= 0; --i)
276                         {
277                                 int iDest = conInfo.constraintIndex + i;
278                                 btAssert(iDest >= iCon);
279                                 btAssert(iDest >= 0 && iDest < numConstraintRows);
280                                 constraintBatchIds[iDest] = iBatch;
281                         }
282                 }
283         }
284 }
285
286 static void expandConstraintRows(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
287 {
288         BT_PROFILE("expandConstraintRows");
289         for (int iCon = 0; iCon < numConstraints; ++iCon)
290         {
291                 const btBatchedConstraintInfo& conInfo = conInfos[iCon];
292                 int iBatch = srcConstraintBatchIds[iCon];
293                 for (int i = 0; i < conInfo.numConstraintRows; ++i)
294                 {
295                         int iDest = conInfo.constraintIndex + i;
296                         btAssert(iDest >= iCon);
297                         btAssert(iDest >= 0 && iDest < numConstraintRows);
298                         destConstraintBatchIds[iDest] = iBatch;
299                 }
300         }
301 }
302
303 struct ExpandConstraintRowsLoop : public btIParallelForBody
304 {
305         int* m_destConstraintBatchIds;
306         const int* m_srcConstraintBatchIds;
307         const btBatchedConstraintInfo* m_conInfos;
308         int m_numConstraintRows;
309
310         ExpandConstraintRowsLoop(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraintRows)
311         {
312                 m_destConstraintBatchIds = destConstraintBatchIds;
313                 m_srcConstraintBatchIds = srcConstraintBatchIds;
314                 m_conInfos = conInfos;
315                 m_numConstraintRows = numConstraintRows;
316         }
317         void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
318         {
319                 expandConstraintRows(m_destConstraintBatchIds, m_srcConstraintBatchIds + iBegin, m_conInfos + iBegin, iEnd - iBegin, m_numConstraintRows);
320         }
321 };
322
323 static void expandConstraintRowsMt(int* destConstraintBatchIds, const int* srcConstraintBatchIds, const btBatchedConstraintInfo* conInfos, int numConstraints, int numConstraintRows)
324 {
325         BT_PROFILE("expandConstraintRowsMt");
326         ExpandConstraintRowsLoop loop(destConstraintBatchIds, srcConstraintBatchIds, conInfos, numConstraintRows);
327         int grainSize = 600;
328         btParallelFor(0, numConstraints, grainSize, loop);
329 }
330
331 static void initBatchedConstraintInfoArray(btAlignedObjectArray<btBatchedConstraintInfo>* outConInfos, btConstraintArray* constraints)
332 {
333         BT_PROFILE("initBatchedConstraintInfoArray");
334         btAlignedObjectArray<btBatchedConstraintInfo>& conInfos = *outConInfos;
335         int numConstraints = constraints->size();
336         conInfos.resizeNoInitialize(numConstraints);
337
338         int newSize = initBatchedConstraintInfo(&outConInfos->at(0), constraints);
339         conInfos.resizeNoInitialize(newSize);
340 }
341
342 static void mergeSmallBatches(btBatchInfo* batches, int iBeginBatch, int iEndBatch, int minBatchSize, int maxBatchSize)
343 {
344         BT_PROFILE("mergeSmallBatches");
345         for (int iBatch = iEndBatch - 1; iBatch >= iBeginBatch; --iBatch)
346         {
347                 btBatchInfo& batch = batches[iBatch];
348                 if (batch.mergeIndex == kNoMerge && batch.numConstraints > 0 && batch.numConstraints < minBatchSize)
349                 {
350                         for (int iDestBatch = iBatch - 1; iDestBatch >= iBeginBatch; --iDestBatch)
351                         {
352                                 btBatchInfo& destBatch = batches[iDestBatch];
353                                 if (destBatch.mergeIndex == kNoMerge && (destBatch.numConstraints + batch.numConstraints) < maxBatchSize)
354                                 {
355                                         destBatch.numConstraints += batch.numConstraints;
356                                         batch.numConstraints = 0;
357                                         batch.mergeIndex = iDestBatch;
358                                         break;
359                                 }
360                         }
361                 }
362         }
363         // flatten mergeIndexes
364         // e.g. in case where A was merged into B and then B was merged into C, we need A to point to C instead of B
365         // Note: loop goes forward through batches because batches always merge from higher indexes to lower,
366         //     so by going from low to high it reduces the amount of trail-following
367         for (int iBatch = iBeginBatch; iBatch < iEndBatch; ++iBatch)
368         {
369                 btBatchInfo& batch = batches[iBatch];
370                 if (batch.mergeIndex != kNoMerge)
371                 {
372                         int iMergeDest = batches[batch.mergeIndex].mergeIndex;
373                         // follow trail of merges to the end
374                         while (iMergeDest != kNoMerge)
375                         {
376                                 int iNext = batches[iMergeDest].mergeIndex;
377                                 if (iNext == kNoMerge)
378                                 {
379                                         batch.mergeIndex = iMergeDest;
380                                         break;
381                                 }
382                                 iMergeDest = iNext;
383                         }
384                 }
385         }
386 }
387
388 static void updateConstraintBatchIdsForMerges(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches)
389 {
390         BT_PROFILE("updateConstraintBatchIdsForMerges");
391         // update batchIds to account for merges
392         for (int i = 0; i < numConstraints; ++i)
393         {
394                 int iBatch = constraintBatchIds[i];
395                 btAssert(iBatch < numBatches);
396                 // if this constraint references a batch that was merged into another batch
397                 if (batches[iBatch].mergeIndex != kNoMerge)
398                 {
399                         // update batchId
400                         constraintBatchIds[i] = batches[iBatch].mergeIndex;
401                 }
402         }
403 }
404
405 struct UpdateConstraintBatchIdsForMergesLoop : public btIParallelForBody
406 {
407         int* m_constraintBatchIds;
408         const btBatchInfo* m_batches;
409         int m_numBatches;
410
411         UpdateConstraintBatchIdsForMergesLoop(int* constraintBatchIds, const btBatchInfo* batches, int numBatches)
412         {
413                 m_constraintBatchIds = constraintBatchIds;
414                 m_batches = batches;
415                 m_numBatches = numBatches;
416         }
417         void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
418         {
419                 BT_PROFILE("UpdateConstraintBatchIdsForMergesLoop");
420                 updateConstraintBatchIdsForMerges(m_constraintBatchIds + iBegin, iEnd - iBegin, m_batches, m_numBatches);
421         }
422 };
423
424 static void updateConstraintBatchIdsForMergesMt(int* constraintBatchIds, int numConstraints, const btBatchInfo* batches, int numBatches)
425 {
426         BT_PROFILE("updateConstraintBatchIdsForMergesMt");
427         UpdateConstraintBatchIdsForMergesLoop loop(constraintBatchIds, batches, numBatches);
428         int grainSize = 800;
429         btParallelFor(0, numConstraints, grainSize, loop);
430 }
431
432 inline bool BatchCompare(const btBatchedConstraints::Range& a, const btBatchedConstraints::Range& b)
433 {
434         int lenA = a.end - a.begin;
435         int lenB = b.end - b.begin;
436         return lenA > lenB;
437 }
438
439 static void writeOutConstraintIndicesForRangeOfBatches(btBatchedConstraints* bc,
440                                                                                                            const int* constraintBatchIds,
441                                                                                                            int numConstraints,
442                                                                                                            int* constraintIdPerBatch,
443                                                                                                            int batchBegin,
444                                                                                                            int batchEnd)
445 {
446         BT_PROFILE("writeOutConstraintIndicesForRangeOfBatches");
447         for (int iCon = 0; iCon < numConstraints; ++iCon)
448         {
449                 int iBatch = constraintBatchIds[iCon];
450                 if (iBatch >= batchBegin && iBatch < batchEnd)
451                 {
452                         int iDestCon = constraintIdPerBatch[iBatch];
453                         constraintIdPerBatch[iBatch] = iDestCon + 1;
454                         bc->m_constraintIndices[iDestCon] = iCon;
455                 }
456         }
457 }
458
459 struct WriteOutConstraintIndicesLoop : public btIParallelForBody
460 {
461         btBatchedConstraints* m_batchedConstraints;
462         const int* m_constraintBatchIds;
463         int m_numConstraints;
464         int* m_constraintIdPerBatch;
465         int m_maxNumBatchesPerPhase;
466
467         WriteOutConstraintIndicesLoop(btBatchedConstraints* bc, const int* constraintBatchIds, int numConstraints, int* constraintIdPerBatch, int maxNumBatchesPerPhase)
468         {
469                 m_batchedConstraints = bc;
470                 m_constraintBatchIds = constraintBatchIds;
471                 m_numConstraints = numConstraints;
472                 m_constraintIdPerBatch = constraintIdPerBatch;
473                 m_maxNumBatchesPerPhase = maxNumBatchesPerPhase;
474         }
475         void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
476         {
477                 BT_PROFILE("WriteOutConstraintIndicesLoop");
478                 int batchBegin = iBegin * m_maxNumBatchesPerPhase;
479                 int batchEnd = iEnd * m_maxNumBatchesPerPhase;
480                 writeOutConstraintIndicesForRangeOfBatches(m_batchedConstraints,
481                                                                                                    m_constraintBatchIds,
482                                                                                                    m_numConstraints,
483                                                                                                    m_constraintIdPerBatch,
484                                                                                                    batchBegin,
485                                                                                                    batchEnd);
486         }
487 };
488
489 static void writeOutConstraintIndicesMt(btBatchedConstraints* bc,
490                                                                                 const int* constraintBatchIds,
491                                                                                 int numConstraints,
492                                                                                 int* constraintIdPerBatch,
493                                                                                 int maxNumBatchesPerPhase,
494                                                                                 int numPhases)
495 {
496         BT_PROFILE("writeOutConstraintIndicesMt");
497         bool inParallel = true;
498         if (inParallel)
499         {
500                 WriteOutConstraintIndicesLoop loop(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase);
501                 btParallelFor(0, numPhases, 1, loop);
502         }
503         else
504         {
505                 for (int iCon = 0; iCon < numConstraints; ++iCon)
506                 {
507                         int iBatch = constraintBatchIds[iCon];
508                         int iDestCon = constraintIdPerBatch[iBatch];
509                         constraintIdPerBatch[iBatch] = iDestCon + 1;
510                         bc->m_constraintIndices[iDestCon] = iCon;
511                 }
512         }
513 }
514
515 static void writeGrainSizes(btBatchedConstraints* bc)
516 {
517         typedef btBatchedConstraints::Range Range;
518         int numPhases = bc->m_phases.size();
519         bc->m_phaseGrainSize.resizeNoInitialize(numPhases);
520         int numThreads = btGetTaskScheduler()->getNumThreads();
521         for (int iPhase = 0; iPhase < numPhases; ++iPhase)
522         {
523                 const Range& phase = bc->m_phases[iPhase];
524                 int numBatches = phase.end - phase.begin;
525                 float grainSize = std::floor((0.25f * numBatches / float(numThreads)) + 0.0f);
526                 bc->m_phaseGrainSize[iPhase] = btMax(1, int(grainSize));
527         }
528 }
529
530 static void writeOutBatches(btBatchedConstraints* bc,
531                                                         const int* constraintBatchIds,
532                                                         int numConstraints,
533                                                         const btBatchInfo* batches,
534                                                         int* batchWork,
535                                                         int maxNumBatchesPerPhase,
536                                                         int numPhases)
537 {
538         BT_PROFILE("writeOutBatches");
539         typedef btBatchedConstraints::Range Range;
540         bc->m_constraintIndices.reserve(numConstraints);
541         bc->m_batches.resizeNoInitialize(0);
542         bc->m_phases.resizeNoInitialize(0);
543
544         //int maxNumBatches = numPhases * maxNumBatchesPerPhase;
545         {
546                 int* constraintIdPerBatch = batchWork;  // for each batch, keep an index into the next available slot in the m_constraintIndices array
547                 int iConstraint = 0;
548                 for (int iPhase = 0; iPhase < numPhases; ++iPhase)
549                 {
550                         int curPhaseBegin = bc->m_batches.size();
551                         int iBegin = iPhase * maxNumBatchesPerPhase;
552                         int iEnd = iBegin + maxNumBatchesPerPhase;
553                         for (int i = iBegin; i < iEnd; ++i)
554                         {
555                                 const btBatchInfo& batch = batches[i];
556                                 int curBatchBegin = iConstraint;
557                                 constraintIdPerBatch[i] = curBatchBegin;  // record the start of each batch in m_constraintIndices array
558                                 int numConstraints = batch.numConstraints;
559                                 iConstraint += numConstraints;
560                                 if (numConstraints > 0)
561                                 {
562                                         bc->m_batches.push_back(Range(curBatchBegin, iConstraint));
563                                 }
564                         }
565                         // if any batches were emitted this phase,
566                         if (bc->m_batches.size() > curPhaseBegin)
567                         {
568                                 // output phase
569                                 bc->m_phases.push_back(Range(curPhaseBegin, bc->m_batches.size()));
570                         }
571                 }
572
573                 btAssert(iConstraint == numConstraints);
574                 bc->m_constraintIndices.resizeNoInitialize(numConstraints);
575                 writeOutConstraintIndicesMt(bc, constraintBatchIds, numConstraints, constraintIdPerBatch, maxNumBatchesPerPhase, numPhases);
576         }
577         // for each phase
578         for (int iPhase = 0; iPhase < bc->m_phases.size(); ++iPhase)
579         {
580                 // sort the batches from largest to smallest (can be helpful to some task schedulers)
581                 const Range& curBatches = bc->m_phases[iPhase];
582                 bc->m_batches.quickSortInternal(BatchCompare, curBatches.begin, curBatches.end - 1);
583         }
584         bc->m_phaseOrder.resize(bc->m_phases.size());
585         for (int i = 0; i < bc->m_phases.size(); ++i)
586         {
587                 bc->m_phaseOrder[i] = i;
588         }
589         writeGrainSizes(bc);
590 }
591
592 //
593 // PreallocatedMemoryHelper -- helper object for allocating a number of chunks of memory in a single contiguous block.
594 //                             It is generally more efficient to do a single larger allocation than many smaller allocations.
595 //
596 // Example Usage:
597 //
598 //  btVector3* bodyPositions = NULL;
599 //  btBatchedConstraintInfo* conInfos = NULL;
600 //  {
601 //    PreallocatedMemoryHelper<8> memHelper;
602 //    memHelper.addChunk( (void**) &bodyPositions, sizeof( btVector3 ) * bodies.size() );
603 //    memHelper.addChunk( (void**) &conInfos, sizeof( btBatchedConstraintInfo ) * numConstraints );
604 //    void* memPtr = malloc( memHelper.getSizeToAllocate() );  // allocate the memory
605 //    memHelper.setChunkPointers( memPtr );  // update pointers to chunks
606 //  }
607 template <int N>
608 class PreallocatedMemoryHelper
609 {
610         struct Chunk
611         {
612                 void** ptr;
613                 size_t size;
614         };
615         Chunk m_chunks[N];
616         int m_numChunks;
617
618 public:
619         PreallocatedMemoryHelper() { m_numChunks = 0; }
620         void addChunk(void** ptr, size_t sz)
621         {
622                 btAssert(m_numChunks < N);
623                 if (m_numChunks < N)
624                 {
625                         Chunk& chunk = m_chunks[m_numChunks];
626                         chunk.ptr = ptr;
627                         chunk.size = sz;
628                         m_numChunks++;
629                 }
630         }
631         size_t getSizeToAllocate() const
632         {
633                 size_t totalSize = 0;
634                 for (int i = 0; i < m_numChunks; ++i)
635                 {
636                         totalSize += m_chunks[i].size;
637                 }
638                 return totalSize;
639         }
640         void setChunkPointers(void* mem) const
641         {
642                 size_t totalSize = 0;
643                 for (int i = 0; i < m_numChunks; ++i)
644                 {
645                         const Chunk& chunk = m_chunks[i];
646                         char* chunkPtr = static_cast<char*>(mem) + totalSize;
647                         *chunk.ptr = chunkPtr;
648                         totalSize += chunk.size;
649                 }
650         }
651 };
652
653 static btVector3 findMaxDynamicConstraintExtent(
654         btVector3* bodyPositions,
655         bool* bodyDynamicFlags,
656         btBatchedConstraintInfo* conInfos,
657         int numConstraints,
658         int numBodies)
659 {
660         BT_PROFILE("findMaxDynamicConstraintExtent");
661         btVector3 consExtent = btVector3(1, 1, 1) * 0.001;
662         for (int iCon = 0; iCon < numConstraints; ++iCon)
663         {
664                 const btBatchedConstraintInfo& con = conInfos[iCon];
665                 int iBody0 = con.bodyIds[0];
666                 int iBody1 = con.bodyIds[1];
667                 btAssert(iBody0 >= 0 && iBody0 < numBodies);
668                 btAssert(iBody1 >= 0 && iBody1 < numBodies);
669                 // is it a dynamic constraint?
670                 if (bodyDynamicFlags[iBody0] && bodyDynamicFlags[iBody1])
671                 {
672                         btVector3 delta = bodyPositions[iBody1] - bodyPositions[iBody0];
673                         consExtent.setMax(delta.absolute());
674                 }
675         }
676         return consExtent;
677 }
678
679 struct btIntVec3
680 {
681         int m_ints[3];
682
683         SIMD_FORCE_INLINE const int& operator[](int i) const { return m_ints[i]; }
684         SIMD_FORCE_INLINE int& operator[](int i) { return m_ints[i]; }
685 };
686
687 struct AssignConstraintsToGridBatchesParams
688 {
689         bool* bodyDynamicFlags;
690         btIntVec3* bodyGridCoords;
691         int numBodies;
692         btBatchedConstraintInfo* conInfos;
693         int* constraintBatchIds;
694         btIntVec3 gridChunkDim;
695         int maxNumBatchesPerPhase;
696         int numPhases;
697         int phaseMask;
698
699         AssignConstraintsToGridBatchesParams()
700         {
701                 memset(this, 0, sizeof(*this));
702         }
703 };
704
705 static void assignConstraintsToGridBatches(const AssignConstraintsToGridBatchesParams& params, int iConBegin, int iConEnd)
706 {
707         BT_PROFILE("assignConstraintsToGridBatches");
708         // (can be done in parallel)
709         for (int iCon = iConBegin; iCon < iConEnd; ++iCon)
710         {
711                 const btBatchedConstraintInfo& con = params.conInfos[iCon];
712                 int iBody0 = con.bodyIds[0];
713                 int iBody1 = con.bodyIds[1];
714                 int iPhase = iCon;  //iBody0; // pseudorandom choice to distribute evenly amongst phases
715                 iPhase &= params.phaseMask;
716                 int gridCoord[3];
717                 // is it a dynamic constraint?
718                 if (params.bodyDynamicFlags[iBody0] && params.bodyDynamicFlags[iBody1])
719                 {
720                         const btIntVec3& body0Coords = params.bodyGridCoords[iBody0];
721                         const btIntVec3& body1Coords = params.bodyGridCoords[iBody1];
722                         // for each dimension x,y,z,
723                         for (int i = 0; i < 3; ++i)
724                         {
725                                 int coordMin = btMin(body0Coords.m_ints[i], body1Coords.m_ints[i]);
726                                 int coordMax = btMax(body0Coords.m_ints[i], body1Coords.m_ints[i]);
727                                 if (coordMin != coordMax)
728                                 {
729                                         btAssert(coordMax == coordMin + 1);
730                                         if ((coordMin & 1) == 0)
731                                         {
732                                                 iPhase &= ~(1 << i);  // force bit off
733                                         }
734                                         else
735                                         {
736                                                 iPhase |= (1 << i);  // force bit on
737                                                 iPhase &= params.phaseMask;
738                                         }
739                                 }
740                                 gridCoord[i] = coordMin;
741                         }
742                 }
743                 else
744                 {
745                         if (!params.bodyDynamicFlags[iBody0])
746                         {
747                                 iBody0 = con.bodyIds[1];
748                         }
749                         btAssert(params.bodyDynamicFlags[iBody0]);
750                         const btIntVec3& body0Coords = params.bodyGridCoords[iBody0];
751                         // for each dimension x,y,z,
752                         for (int i = 0; i < 3; ++i)
753                         {
754                                 gridCoord[i] = body0Coords.m_ints[i];
755                         }
756                 }
757                 // calculate chunk coordinates
758                 int chunkCoord[3];
759                 btIntVec3 gridChunkDim = params.gridChunkDim;
760                 // for each dimension x,y,z,
761                 for (int i = 0; i < 3; ++i)
762                 {
763                         int coordOffset = (iPhase >> i) & 1;
764                         chunkCoord[i] = (gridCoord[i] - coordOffset) / 2;
765                         btClamp(chunkCoord[i], 0, gridChunkDim[i] - 1);
766                         btAssert(chunkCoord[i] < gridChunkDim[i]);
767                 }
768                 int iBatch = iPhase * params.maxNumBatchesPerPhase + chunkCoord[0] + chunkCoord[1] * gridChunkDim[0] + chunkCoord[2] * gridChunkDim[0] * gridChunkDim[1];
769                 btAssert(iBatch >= 0 && iBatch < params.maxNumBatchesPerPhase * params.numPhases);
770                 params.constraintBatchIds[iCon] = iBatch;
771         }
772 }
773
774 struct AssignConstraintsToGridBatchesLoop : public btIParallelForBody
775 {
776         const AssignConstraintsToGridBatchesParams* m_params;
777
778         AssignConstraintsToGridBatchesLoop(const AssignConstraintsToGridBatchesParams& params)
779         {
780                 m_params = &params;
781         }
782         void forLoop(int iBegin, int iEnd) const BT_OVERRIDE
783         {
784                 assignConstraintsToGridBatches(*m_params, iBegin, iEnd);
785         }
786 };
787
788 //
789 // setupSpatialGridBatchesMt -- generate batches using a uniform 3D grid
790 //
791 /*
792
793 Bodies are treated as 3D points at their center of mass. We only consider dynamic bodies at this stage,
794 because only dynamic bodies are mutated when a constraint is solved, thus subject to race conditions.
795
796 1. Compute a bounding box around all dynamic bodies
797 2. Compute the maximum extent of all dynamic constraints. Each dynamic constraint is treated as a line segment, and we need the size of
798    box that will fully enclose any single dynamic constraint
799
800 3. Establish the cell size of our grid, the cell size in each dimension must be at least as large as the dynamic constraints max-extent,
801    so that no dynamic constraint can span more than 2 cells of our grid on any axis of the grid. The cell size should be adjusted
802    larger in order to keep the total number of cells from being excessively high
803
804 Key idea: Given that each constraint spans 1 or 2 grid cells in each dimension, we can handle all constraints by processing
805           in chunks of 2x2x2 cells with 8 different 1-cell offsets ((0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,0,0)...).
806           For each of the 8 offsets, we create a phase, and for each 2x2x2 chunk with dynamic constraints becomes a batch in that phase.
807
808 4. Once the grid is established, we can calculate for each constraint which phase and batch it belongs in.
809
810 5. Do a merge small batches on the batches of each phase separately, to try to even out the sizes of batches
811
812 Optionally, we can "collapse" one dimension of our 3D grid to turn it into a 2D grid, which reduces the number of phases
813 to 4. With fewer phases, there are more constraints per phase and this makes it easier to create batches of a useful size.
814 */
815 //
816 static void setupSpatialGridBatchesMt(
817         btBatchedConstraints* batchedConstraints,
818         btAlignedObjectArray<char>* scratchMemory,
819         btConstraintArray* constraints,
820         const btAlignedObjectArray<btSolverBody>& bodies,
821         int minBatchSize,
822         int maxBatchSize,
823         bool use2DGrid)
824 {
825         BT_PROFILE("setupSpatialGridBatchesMt");
826         const int numPhases = 8;
827         int numConstraints = constraints->size();
828         int numConstraintRows = constraints->size();
829
830         const int maxGridChunkCount = 128;
831         int allocNumBatchesPerPhase = maxGridChunkCount;
832         int minNumBatchesPerPhase = 16;
833         int allocNumBatches = allocNumBatchesPerPhase * numPhases;
834
835         btVector3* bodyPositions = NULL;
836         bool* bodyDynamicFlags = NULL;
837         btIntVec3* bodyGridCoords = NULL;
838         btBatchInfo* batches = NULL;
839         int* batchWork = NULL;
840         btBatchedConstraintInfo* conInfos = NULL;
841         int* constraintBatchIds = NULL;
842         int* constraintRowBatchIds = NULL;
843         {
844                 PreallocatedMemoryHelper<10> memHelper;
845                 memHelper.addChunk((void**)&bodyPositions, sizeof(btVector3) * bodies.size());
846                 memHelper.addChunk((void**)&bodyDynamicFlags, sizeof(bool) * bodies.size());
847                 memHelper.addChunk((void**)&bodyGridCoords, sizeof(btIntVec3) * bodies.size());
848                 memHelper.addChunk((void**)&batches, sizeof(btBatchInfo) * allocNumBatches);
849                 memHelper.addChunk((void**)&batchWork, sizeof(int) * allocNumBatches);
850                 memHelper.addChunk((void**)&conInfos, sizeof(btBatchedConstraintInfo) * numConstraints);
851                 memHelper.addChunk((void**)&constraintBatchIds, sizeof(int) * numConstraints);
852                 memHelper.addChunk((void**)&constraintRowBatchIds, sizeof(int) * numConstraintRows);
853                 size_t scratchSize = memHelper.getSizeToAllocate();
854                 // if we need to reallocate
855                 if (static_cast<size_t>(scratchMemory->capacity()) < scratchSize)
856                 {
857                         // allocate 6.25% extra to avoid repeated reallocs
858                         scratchMemory->reserve(scratchSize + scratchSize / 16);
859                 }
860                 scratchMemory->resizeNoInitialize(scratchSize);
861                 char* memPtr = &scratchMemory->at(0);
862                 memHelper.setChunkPointers(memPtr);
863         }
864
865         numConstraints = initBatchedConstraintInfo(conInfos, constraints);
866
867         // compute bounding box around all dynamic bodies
868         // (could be done in parallel)
869         btVector3 bboxMin(BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT);
870         btVector3 bboxMax = -bboxMin;
871         //int dynamicBodyCount = 0;
872         for (int i = 0; i < bodies.size(); ++i)
873         {
874                 const btSolverBody& body = bodies[i];
875                 btVector3 bodyPos = body.getWorldTransform().getOrigin();
876                 bool isDynamic = (body.internalGetInvMass().x() > btScalar(0));
877                 bodyPositions[i] = bodyPos;
878                 bodyDynamicFlags[i] = isDynamic;
879                 if (isDynamic)
880                 {
881                         //dynamicBodyCount++;
882                         bboxMin.setMin(bodyPos);
883                         bboxMax.setMax(bodyPos);
884                 }
885         }
886
887         // find max extent of all dynamic constraints
888         // (could be done in parallel)
889         btVector3 consExtent = findMaxDynamicConstraintExtent(bodyPositions, bodyDynamicFlags, conInfos, numConstraints, bodies.size());
890
891         btVector3 gridExtent = bboxMax - bboxMin;
892
893         gridExtent.setMax(btVector3(btScalar(1), btScalar(1), btScalar(1)));
894
895         btVector3 gridCellSize = consExtent;
896         int gridDim[3];
897         gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x());
898         gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y());
899         gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z());
900
901         // if we can collapse an axis, it will cut our number of phases in half which could be more efficient
902         int phaseMask = 7;
903         bool collapseAxis = use2DGrid;
904         if (collapseAxis)
905         {
906                 // pick the smallest axis to collapse, leaving us with the greatest number of cells in our grid
907                 int iAxisToCollapse = 0;
908                 int axisDim = gridDim[iAxisToCollapse];
909                 //for each dimension
910                 for (int i = 0; i < 3; ++i)
911                 {
912                         if (gridDim[i] < axisDim)
913                         {
914                                 iAxisToCollapse = i;
915                                 axisDim = gridDim[i];
916                         }
917                 }
918                 // collapse it
919                 gridCellSize[iAxisToCollapse] = gridExtent[iAxisToCollapse] * 2.0f;
920                 phaseMask &= ~(1 << iAxisToCollapse);
921         }
922
923         int numGridChunks = 0;
924         btIntVec3 gridChunkDim;  // each chunk is 2x2x2 group of cells
925         while (true)
926         {
927                 gridDim[0] = int(1.0 + gridExtent.x() / gridCellSize.x());
928                 gridDim[1] = int(1.0 + gridExtent.y() / gridCellSize.y());
929                 gridDim[2] = int(1.0 + gridExtent.z() / gridCellSize.z());
930                 gridChunkDim[0] = btMax(1, (gridDim[0] + 0) / 2);
931                 gridChunkDim[1] = btMax(1, (gridDim[1] + 0) / 2);
932                 gridChunkDim[2] = btMax(1, (gridDim[2] + 0) / 2);
933                 numGridChunks = gridChunkDim[0] * gridChunkDim[1] * gridChunkDim[2];
934                 float nChunks = float(gridChunkDim[0]) * float(gridChunkDim[1]) * float(gridChunkDim[2]);  // suceptible to integer overflow
935                 if (numGridChunks <= maxGridChunkCount && nChunks <= maxGridChunkCount)
936                 {
937                         break;
938                 }
939                 gridCellSize *= 1.25;  // should roughly cut numCells in half
940         }
941         btAssert(numGridChunks <= maxGridChunkCount);
942         int maxNumBatchesPerPhase = numGridChunks;
943
944         // for each dynamic body, compute grid coords
945         btVector3 invGridCellSize = btVector3(1, 1, 1) / gridCellSize;
946         // (can be done in parallel)
947         for (int iBody = 0; iBody < bodies.size(); ++iBody)
948         {
949                 btIntVec3& coords = bodyGridCoords[iBody];
950                 if (bodyDynamicFlags[iBody])
951                 {
952                         btVector3 v = (bodyPositions[iBody] - bboxMin) * invGridCellSize;
953                         coords.m_ints[0] = int(v.x());
954                         coords.m_ints[1] = int(v.y());
955                         coords.m_ints[2] = int(v.z());
956                         btAssert(coords.m_ints[0] >= 0 && coords.m_ints[0] < gridDim[0]);
957                         btAssert(coords.m_ints[1] >= 0 && coords.m_ints[1] < gridDim[1]);
958                         btAssert(coords.m_ints[2] >= 0 && coords.m_ints[2] < gridDim[2]);
959                 }
960                 else
961                 {
962                         coords.m_ints[0] = -1;
963                         coords.m_ints[1] = -1;
964                         coords.m_ints[2] = -1;
965                 }
966         }
967
968         for (int iPhase = 0; iPhase < numPhases; ++iPhase)
969         {
970                 int batchBegin = iPhase * maxNumBatchesPerPhase;
971                 int batchEnd = batchBegin + maxNumBatchesPerPhase;
972                 for (int iBatch = batchBegin; iBatch < batchEnd; ++iBatch)
973                 {
974                         btBatchInfo& batch = batches[iBatch];
975                         batch = btBatchInfo();
976                 }
977         }
978
979         {
980                 AssignConstraintsToGridBatchesParams params;
981                 params.bodyDynamicFlags = bodyDynamicFlags;
982                 params.bodyGridCoords = bodyGridCoords;
983                 params.numBodies = bodies.size();
984                 params.conInfos = conInfos;
985                 params.constraintBatchIds = constraintBatchIds;
986                 params.gridChunkDim = gridChunkDim;
987                 params.maxNumBatchesPerPhase = maxNumBatchesPerPhase;
988                 params.numPhases = numPhases;
989                 params.phaseMask = phaseMask;
990                 bool inParallel = true;
991                 if (inParallel)
992                 {
993                         AssignConstraintsToGridBatchesLoop loop(params);
994                         int grainSize = 250;
995                         btParallelFor(0, numConstraints, grainSize, loop);
996                 }
997                 else
998                 {
999                         assignConstraintsToGridBatches(params, 0, numConstraints);
1000                 }
1001         }
1002         for (int iCon = 0; iCon < numConstraints; ++iCon)
1003         {
1004                 const btBatchedConstraintInfo& con = conInfos[iCon];
1005                 int iBatch = constraintBatchIds[iCon];
1006                 btBatchInfo& batch = batches[iBatch];
1007                 batch.numConstraints += con.numConstraintRows;
1008         }
1009
1010         for (int iPhase = 0; iPhase < numPhases; ++iPhase)
1011         {
1012                 // if phase is legit,
1013                 if (iPhase == (iPhase & phaseMask))
1014                 {
1015                         int iBeginBatch = iPhase * maxNumBatchesPerPhase;
1016                         int iEndBatch = iBeginBatch + maxNumBatchesPerPhase;
1017                         mergeSmallBatches(batches, iBeginBatch, iEndBatch, minBatchSize, maxBatchSize);
1018                 }
1019         }
1020         // all constraints have been assigned a batchId
1021         updateConstraintBatchIdsForMergesMt(constraintBatchIds, numConstraints, batches, maxNumBatchesPerPhase * numPhases);
1022
1023         if (numConstraintRows > numConstraints)
1024         {
1025                 expandConstraintRowsMt(&constraintRowBatchIds[0], &constraintBatchIds[0], &conInfos[0], numConstraints, numConstraintRows);
1026         }
1027         else
1028         {
1029                 constraintRowBatchIds = constraintBatchIds;
1030         }
1031
1032         writeOutBatches(batchedConstraints, constraintRowBatchIds, numConstraintRows, batches, batchWork, maxNumBatchesPerPhase, numPhases);
1033         btAssert(batchedConstraints->validate(constraints, bodies));
1034 }
1035
1036 static void setupSingleBatch(
1037         btBatchedConstraints* bc,
1038         int numConstraints)
1039 {
1040         BT_PROFILE("setupSingleBatch");
1041         typedef btBatchedConstraints::Range Range;
1042
1043         bc->m_constraintIndices.resize(numConstraints);
1044         for (int i = 0; i < numConstraints; ++i)
1045         {
1046                 bc->m_constraintIndices[i] = i;
1047         }
1048
1049         bc->m_batches.resizeNoInitialize(0);
1050         bc->m_phases.resizeNoInitialize(0);
1051         bc->m_phaseOrder.resizeNoInitialize(0);
1052         bc->m_phaseGrainSize.resizeNoInitialize(0);
1053
1054         if (numConstraints > 0)
1055         {
1056                 bc->m_batches.push_back(Range(0, numConstraints));
1057                 bc->m_phases.push_back(Range(0, 1));
1058                 bc->m_phaseOrder.push_back(0);
1059                 bc->m_phaseGrainSize.push_back(1);
1060         }
1061 }
1062
1063 void btBatchedConstraints::setup(
1064         btConstraintArray* constraints,
1065         const btAlignedObjectArray<btSolverBody>& bodies,
1066         BatchingMethod batchingMethod,
1067         int minBatchSize,
1068         int maxBatchSize,
1069         btAlignedObjectArray<char>* scratchMemory)
1070 {
1071         if (constraints->size() >= minBatchSize * 4)
1072         {
1073                 bool use2DGrid = batchingMethod == BATCHING_METHOD_SPATIAL_GRID_2D;
1074                 setupSpatialGridBatchesMt(this, scratchMemory, constraints, bodies, minBatchSize, maxBatchSize, use2DGrid);
1075                 if (s_debugDrawBatches)
1076                 {
1077                         debugDrawAllBatches(this, constraints, bodies);
1078                 }
1079         }
1080         else
1081         {
1082                 setupSingleBatch(this, constraints->size());
1083         }
1084 }