2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
15 ///b3DynamicBvh implementation by Nathanael Presson
17 #include "b3DynamicBvh.h"
20 typedef b3AlignedObjectArray<b3DbvtNode*> b3NodeArray;
21 typedef b3AlignedObjectArray<const b3DbvtNode*> b3ConstNodeArray;
24 struct b3DbvtNodeEnumerator : b3DynamicBvh::ICollide
26 b3ConstNodeArray nodes;
27 void Process(const b3DbvtNode* n) { nodes.push_back(n); }
31 static B3_DBVT_INLINE int b3IndexOf(const b3DbvtNode* node)
33 return (node->parent->childs[1] == node);
37 static B3_DBVT_INLINE b3DbvtVolume b3Merge(const b3DbvtVolume& a,
38 const b3DbvtVolume& b)
40 #if (B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE)
41 B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtAabbMm)]);
42 b3DbvtVolume& res = *(b3DbvtVolume*)locals;
50 // volume+edge lengths
51 static B3_DBVT_INLINE b3Scalar b3Size(const b3DbvtVolume& a)
53 const b3Vector3 edges = a.Lengths();
54 return (edges.x * edges.y * edges.z +
55 edges.x + edges.y + edges.z);
59 static void b3GetMaxDepth(const b3DbvtNode* node, int depth, int& maxdepth)
61 if (node->isinternal())
63 b3GetMaxDepth(node->childs[0], depth + 1, maxdepth);
64 b3GetMaxDepth(node->childs[1], depth + 1, maxdepth);
67 maxdepth = b3Max(maxdepth, depth);
71 static B3_DBVT_INLINE void b3DeleteNode(b3DynamicBvh* pdbvt,
74 b3AlignedFree(pdbvt->m_free);
79 static void b3RecurseDeleteNode(b3DynamicBvh* pdbvt,
84 b3RecurseDeleteNode(pdbvt, node->childs[0]);
85 b3RecurseDeleteNode(pdbvt, node->childs[1]);
87 if (node == pdbvt->m_root) pdbvt->m_root = 0;
88 b3DeleteNode(pdbvt, node);
92 static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
104 node = new (b3AlignedAlloc(sizeof(b3DbvtNode), 16)) b3DbvtNode();
106 node->parent = parent;
113 static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
115 const b3DbvtVolume& volume,
118 b3DbvtNode* node = b3CreateNode(pdbvt, parent, data);
119 node->volume = volume;
124 static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
126 const b3DbvtVolume& volume0,
127 const b3DbvtVolume& volume1,
130 b3DbvtNode* node = b3CreateNode(pdbvt, parent, data);
131 b3Merge(volume0, volume1, node->volume);
136 static void b3InsertLeaf(b3DynamicBvh* pdbvt,
142 pdbvt->m_root = leaf;
151 root = root->childs[b3Select(leaf->volume,
152 root->childs[0]->volume,
153 root->childs[1]->volume)];
154 } while (!root->isleaf());
156 b3DbvtNode* prev = root->parent;
157 b3DbvtNode* node = b3CreateNode(pdbvt, prev, leaf->volume, root->volume, 0);
160 prev->childs[b3IndexOf(root)] = node;
161 node->childs[0] = root;
163 node->childs[1] = leaf;
167 if (!prev->volume.Contain(node->volume))
168 b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume);
172 } while (0 != (prev = node->parent));
176 node->childs[0] = root;
178 node->childs[1] = leaf;
180 pdbvt->m_root = node;
186 static b3DbvtNode* b3RemoveLeaf(b3DynamicBvh* pdbvt,
189 if (leaf == pdbvt->m_root)
196 b3DbvtNode* parent = leaf->parent;
197 b3DbvtNode* prev = parent->parent;
198 b3DbvtNode* sibling = parent->childs[1 - b3IndexOf(leaf)];
201 prev->childs[b3IndexOf(parent)] = sibling;
202 sibling->parent = prev;
203 b3DeleteNode(pdbvt, parent);
206 const b3DbvtVolume pb = prev->volume;
207 b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume);
208 if (b3NotEqual(pb, prev->volume))
215 return (prev ? prev : pdbvt->m_root);
219 pdbvt->m_root = sibling;
221 b3DeleteNode(pdbvt, parent);
222 return (pdbvt->m_root);
228 static void b3FetchLeaves(b3DynamicBvh* pdbvt,
233 if (root->isinternal() && depth)
235 b3FetchLeaves(pdbvt, root->childs[0], leaves, depth - 1);
236 b3FetchLeaves(pdbvt, root->childs[1], leaves, depth - 1);
237 b3DeleteNode(pdbvt, root);
241 leaves.push_back(root);
245 static bool b3LeftOfAxis(const b3DbvtNode* node,
246 const b3Vector3& org,
247 const b3Vector3& axis)
249 return b3Dot(axis, node->volume.Center() - org) <= 0;
252 // Partitions leaves such that leaves[0, n) are on the
253 // left of axis, and leaves[n, count) are on the right
254 // of axis. returns N.
255 static int b3Split(b3DbvtNode** leaves,
257 const b3Vector3& org,
258 const b3Vector3& axis)
264 while (begin != end && b3LeftOfAxis(leaves[begin], org, axis))
274 while (begin != end && !b3LeftOfAxis(leaves[end - 1], org, axis))
284 // swap out of place nodes
286 b3DbvtNode* temp = leaves[begin];
287 leaves[begin] = leaves[end];
296 static b3DbvtVolume b3Bounds(b3DbvtNode** leaves,
299 #if B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE
300 B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtVolume)]);
301 b3DbvtVolume& volume = *(b3DbvtVolume*)locals;
302 volume = leaves[0]->volume;
304 b3DbvtVolume volume = leaves[0]->volume;
306 for (int i = 1, ni = count; i < ni; ++i)
308 b3Merge(volume, leaves[i]->volume, volume);
314 static void b3BottomUp(b3DynamicBvh* pdbvt,
320 b3Scalar minsize = B3_INFINITY;
321 int minidx[2] = {-1, -1};
322 for (int i = 0; i < count; ++i)
324 for (int j = i + 1; j < count; ++j)
326 const b3Scalar sz = b3Size(b3Merge(leaves[i]->volume, leaves[j]->volume));
335 b3DbvtNode* n[] = {leaves[minidx[0]], leaves[minidx[1]]};
336 b3DbvtNode* p = b3CreateNode(pdbvt, 0, n[0]->volume, n[1]->volume, 0);
341 leaves[minidx[0]] = p;
342 leaves[minidx[1]] = leaves[count - 1];
348 static b3DbvtNode* b3TopDown(b3DynamicBvh* pdbvt,
353 static const b3Vector3 axis[] = {b3MakeVector3(1, 0, 0),
354 b3MakeVector3(0, 1, 0),
355 b3MakeVector3(0, 0, 1)};
356 b3Assert(bu_treshold > 1);
359 if (count > bu_treshold)
361 const b3DbvtVolume vol = b3Bounds(leaves, count);
362 const b3Vector3 org = vol.Center();
365 int bestmidp = count;
366 int splitcount[3][2] = {{0, 0}, {0, 0}, {0, 0}};
368 for (i = 0; i < count; ++i)
370 const b3Vector3 x = leaves[i]->volume.Center() - org;
371 for (int j = 0; j < 3; ++j)
373 ++splitcount[j][b3Dot(x, axis[j]) > 0 ? 1 : 0];
376 for (i = 0; i < 3; ++i)
378 if ((splitcount[i][0] > 0) && (splitcount[i][1] > 0))
380 const int midp = (int)b3Fabs(b3Scalar(splitcount[i][0] - splitcount[i][1]));
390 partition = b3Split(leaves, count, org, axis[bestaxis]);
391 b3Assert(partition != 0 && partition != count);
395 partition = count / 2 + 1;
397 b3DbvtNode* node = b3CreateNode(pdbvt, 0, vol, 0);
398 node->childs[0] = b3TopDown(pdbvt, &leaves[0], partition, bu_treshold);
399 node->childs[1] = b3TopDown(pdbvt, &leaves[partition], count - partition, bu_treshold);
400 node->childs[0]->parent = node;
401 node->childs[1]->parent = node;
406 b3BottomUp(pdbvt, leaves, count);
414 static B3_DBVT_INLINE b3DbvtNode* b3Sort(b3DbvtNode* n, b3DbvtNode*& r)
416 b3DbvtNode* p = n->parent;
417 b3Assert(n->isinternal());
420 const int i = b3IndexOf(n);
422 b3DbvtNode* s = p->childs[j];
423 b3DbvtNode* q = p->parent;
424 b3Assert(n == p->childs[i]);
426 q->childs[b3IndexOf(p)] = n;
432 p->childs[0] = n->childs[0];
433 p->childs[1] = n->childs[1];
434 n->childs[0]->parent = p;
435 n->childs[1]->parent = p;
438 b3Swap(p->volume, n->volume);
445 static B3_DBVT_INLINE b3DbvtNode* walkup(b3DbvtNode* n,int count)
447 while(n&&(count--)) n=n->parent;
457 b3DynamicBvh::b3DynamicBvh()
467 b3DynamicBvh::~b3DynamicBvh()
473 void b3DynamicBvh::clear()
476 b3RecurseDeleteNode(this, m_root);
477 b3AlignedFree(m_free);
485 void b3DynamicBvh::optimizeBottomUp()
490 leaves.reserve(m_leaves);
491 b3FetchLeaves(this, m_root, leaves);
492 b3BottomUp(this, &leaves[0], leaves.size());
498 void b3DynamicBvh::optimizeTopDown(int bu_treshold)
503 leaves.reserve(m_leaves);
504 b3FetchLeaves(this, m_root, leaves);
505 m_root = b3TopDown(this, &leaves[0], leaves.size(), bu_treshold);
510 void b3DynamicBvh::optimizeIncremental(int passes)
512 if (passes < 0) passes = m_leaves;
513 if (m_root && (passes > 0))
517 b3DbvtNode* node = m_root;
519 while (node->isinternal())
521 node = b3Sort(node, m_root)->childs[(m_opath >> bit) & 1];
522 bit = (bit + 1) & (sizeof(unsigned) * 8 - 1);
531 b3DbvtNode* b3DynamicBvh::insert(const b3DbvtVolume& volume, void* data)
533 b3DbvtNode* leaf = b3CreateNode(this, 0, volume, data);
534 b3InsertLeaf(this, m_root, leaf);
540 void b3DynamicBvh::update(b3DbvtNode* leaf, int lookahead)
542 b3DbvtNode* root = b3RemoveLeaf(this, leaf);
547 for (int i = 0; (i < lookahead) && root->parent; ++i)
555 b3InsertLeaf(this, root, leaf);
559 void b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume)
561 b3DbvtNode* root = b3RemoveLeaf(this, leaf);
566 for (int i = 0; (i < m_lkhd) && root->parent; ++i)
574 leaf->volume = volume;
575 b3InsertLeaf(this, root, leaf);
579 bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity, b3Scalar margin)
581 if (leaf->volume.Contain(volume)) return (false);
582 volume.Expand(b3MakeVector3(margin, margin, margin));
583 volume.SignedExpand(velocity);
584 update(leaf, volume);
589 bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity)
591 if (leaf->volume.Contain(volume)) return (false);
592 volume.SignedExpand(velocity);
593 update(leaf, volume);
598 bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, b3Scalar margin)
600 if (leaf->volume.Contain(volume)) return (false);
601 volume.Expand(b3MakeVector3(margin, margin, margin));
602 update(leaf, volume);
607 void b3DynamicBvh::remove(b3DbvtNode* leaf)
609 b3RemoveLeaf(this, leaf);
610 b3DeleteNode(this, leaf);
615 void b3DynamicBvh::write(IWriter* iwriter) const
617 b3DbvtNodeEnumerator nodes;
618 nodes.nodes.reserve(m_leaves * 2);
619 enumNodes(m_root, nodes);
620 iwriter->Prepare(m_root, nodes.nodes.size());
621 for (int i = 0; i < nodes.nodes.size(); ++i)
623 const b3DbvtNode* n = nodes.nodes[i];
625 if (n->parent) p = nodes.nodes.findLinearSearch(n->parent);
628 const int c0 = nodes.nodes.findLinearSearch(n->childs[0]);
629 const int c1 = nodes.nodes.findLinearSearch(n->childs[1]);
630 iwriter->WriteNode(n, i, p, c0, c1);
634 iwriter->WriteLeaf(n, i, p);
640 void b3DynamicBvh::clone(b3DynamicBvh& dest, IClone* iclone) const
645 b3AlignedObjectArray<sStkCLN> stack;
646 stack.reserve(m_leaves);
647 stack.push_back(sStkCLN(m_root, 0));
650 const int i = stack.size() - 1;
651 const sStkCLN e = stack[i];
652 b3DbvtNode* n = b3CreateNode(&dest, e.parent, e.node->volume, e.node->data);
655 e.parent->childs[i & 1] = n;
658 if (e.node->isinternal())
660 stack.push_back(sStkCLN(e.node->childs[0], n));
661 stack.push_back(sStkCLN(e.node->childs[1], n));
665 iclone->CloneLeaf(n);
667 } while (stack.size() > 0);
672 int b3DynamicBvh::maxdepth(const b3DbvtNode* node)
675 if (node) b3GetMaxDepth(node, 1, depth);
680 int b3DynamicBvh::countLeaves(const b3DbvtNode* node)
682 if (node->isinternal())
683 return (countLeaves(node->childs[0]) + countLeaves(node->childs[1]));
689 void b3DynamicBvh::extractLeaves(const b3DbvtNode* node, b3AlignedObjectArray<const b3DbvtNode*>& leaves)
691 if (node->isinternal())
693 extractLeaves(node->childs[0], leaves);
694 extractLeaves(node->childs[1], leaves);
698 leaves.push_back(node);
703 #if B3_DBVT_ENABLE_BENCHMARK
711 /Ox /Ob2 /Oi /Ot /I "." /I "..\.." /I "..\..\src" /D "NDEBUG" /D "_LIB" /D "_WINDOWS" /D "_CRT_SECURE_NO_DEPRECATE" /D "_CRT_NONSTDC_NO_DEPRECATE" /D "WIN32"
712 /GF /FD /MT /GS- /Gy /arch:SSE2 /Zc:wchar_t- /Fp"..\..\out\release8\build\libbulletcollision\libbulletcollision.pch"
713 /Fo"..\..\out\release8\build\libbulletcollision\\"
714 /Fd"..\..\out\release8\build\libbulletcollision\bulletcollision.pdb"
715 /W3 /nologo /c /Wp64 /Zi /errorReport:prompt
718 World scale: 100.000000
719 Extents base: 1.000000
720 Extents range: 4.000000
722 sizeof(b3DbvtVolume): 32 bytes
723 sizeof(b3DbvtNode): 44 bytes
724 [1] b3DbvtVolume intersections: 3499 ms (-1%)
725 [2] b3DbvtVolume merges: 1934 ms (0%)
726 [3] b3DynamicBvh::collideTT: 5485 ms (-21%)
727 [4] b3DynamicBvh::collideTT self: 2814 ms (-20%)
728 [5] b3DynamicBvh::collideTT xform: 7379 ms (-1%)
729 [6] b3DynamicBvh::collideTT xform,self: 7270 ms (-2%)
730 [7] b3DynamicBvh::rayTest: 6314 ms (0%),(332143 r/s)
731 [8] insert/remove: 2093 ms (0%),(1001983 ir/s)
732 [9] updates (teleport): 1879 ms (-3%),(1116100 u/s)
733 [10] updates (jitter): 1244 ms (-4%),(1685813 u/s)
734 [11] optimize (incremental): 2514 ms (0%),(1668000 o/s)
735 [12] b3DbvtVolume notequal: 3659 ms (0%)
736 [13] culling(OCL+fullsort): 2218 ms (0%),(461 t/s)
737 [14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
738 [15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
739 [16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
740 [17] b3DbvtVolume select: 3419 ms (0%)
743 struct b3DbvtBenchmark
745 struct NilPolicy : b3DynamicBvh::ICollide
747 NilPolicy() : m_pcount(0), m_depth(-B3_INFINITY), m_checksort(true) {}
748 void Process(const b3DbvtNode*, const b3DbvtNode*) { ++m_pcount; }
749 void Process(const b3DbvtNode*) { ++m_pcount; }
750 void Process(const b3DbvtNode*, b3Scalar depth)
755 if (depth >= m_depth)
758 printf("wrong depth: %f (should be >= %f)\r\n", depth, m_depth);
765 struct P14 : b3DynamicBvh::ICollide
769 const b3DbvtNode* leaf;
772 void Process(const b3DbvtNode* leaf, b3Scalar depth)
778 static int sortfnc(const Node& a, const Node& b)
780 if (a.depth < b.depth) return (+1);
781 if (a.depth > b.depth) return (-1);
784 b3AlignedObjectArray<Node> m_nodes;
786 struct P15 : b3DynamicBvh::ICollide
790 const b3DbvtNode* leaf;
793 void Process(const b3DbvtNode* leaf)
797 n.depth = dot(leaf->volume.Center(), m_axis);
799 static int sortfnc(const Node& a, const Node& b)
801 if (a.depth < b.depth) return (+1);
802 if (a.depth > b.depth) return (-1);
805 b3AlignedObjectArray<Node> m_nodes;
808 static b3Scalar RandUnit()
810 return (rand() / (b3Scalar)RAND_MAX);
812 static b3Vector3 RandVector3()
814 return (b3Vector3(RandUnit(), RandUnit(), RandUnit()));
816 static b3Vector3 RandVector3(b3Scalar cs)
818 return (RandVector3() * cs - b3Vector3(cs, cs, cs) / 2);
820 static b3DbvtVolume RandVolume(b3Scalar cs, b3Scalar eb, b3Scalar es)
822 return (b3DbvtVolume::FromCE(RandVector3(cs), b3Vector3(eb, eb, eb) + RandVector3() * es));
824 static b3Transform RandTransform(b3Scalar cs)
827 t.setOrigin(RandVector3(cs));
828 t.setRotation(b3Quaternion(RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2).normalized());
831 static void RandTree(b3Scalar cs, b3Scalar eb, b3Scalar es, int leaves, b3DynamicBvh& dbvt)
834 for (int i = 0; i < leaves; ++i)
836 dbvt.insert(RandVolume(cs, eb, es), 0);
841 void b3DynamicBvh::benchmark()
843 static const b3Scalar cfgVolumeCenterScale = 100;
844 static const b3Scalar cfgVolumeExentsBase = 1;
845 static const b3Scalar cfgVolumeExentsScale = 4;
846 static const int cfgLeaves = 8192;
847 static const bool cfgEnable = true;
849 //[1] b3DbvtVolume intersections
850 bool cfgBenchmark1_Enable = cfgEnable;
851 static const int cfgBenchmark1_Iterations = 8;
852 static const int cfgBenchmark1_Reference = 3499;
853 //[2] b3DbvtVolume merges
854 bool cfgBenchmark2_Enable = cfgEnable;
855 static const int cfgBenchmark2_Iterations = 4;
856 static const int cfgBenchmark2_Reference = 1945;
857 //[3] b3DynamicBvh::collideTT
858 bool cfgBenchmark3_Enable = cfgEnable;
859 static const int cfgBenchmark3_Iterations = 512;
860 static const int cfgBenchmark3_Reference = 5485;
861 //[4] b3DynamicBvh::collideTT self
862 bool cfgBenchmark4_Enable = cfgEnable;
863 static const int cfgBenchmark4_Iterations = 512;
864 static const int cfgBenchmark4_Reference = 2814;
865 //[5] b3DynamicBvh::collideTT xform
866 bool cfgBenchmark5_Enable = cfgEnable;
867 static const int cfgBenchmark5_Iterations = 512;
868 static const b3Scalar cfgBenchmark5_OffsetScale = 2;
869 static const int cfgBenchmark5_Reference = 7379;
870 //[6] b3DynamicBvh::collideTT xform,self
871 bool cfgBenchmark6_Enable = cfgEnable;
872 static const int cfgBenchmark6_Iterations = 512;
873 static const b3Scalar cfgBenchmark6_OffsetScale = 2;
874 static const int cfgBenchmark6_Reference = 7270;
875 //[7] b3DynamicBvh::rayTest
876 bool cfgBenchmark7_Enable = cfgEnable;
877 static const int cfgBenchmark7_Passes = 32;
878 static const int cfgBenchmark7_Iterations = 65536;
879 static const int cfgBenchmark7_Reference = 6307;
881 bool cfgBenchmark8_Enable = cfgEnable;
882 static const int cfgBenchmark8_Passes = 32;
883 static const int cfgBenchmark8_Iterations = 65536;
884 static const int cfgBenchmark8_Reference = 2105;
885 //[9] updates (teleport)
886 bool cfgBenchmark9_Enable = cfgEnable;
887 static const int cfgBenchmark9_Passes = 32;
888 static const int cfgBenchmark9_Iterations = 65536;
889 static const int cfgBenchmark9_Reference = 1879;
890 //[10] updates (jitter)
891 bool cfgBenchmark10_Enable = cfgEnable;
892 static const b3Scalar cfgBenchmark10_Scale = cfgVolumeCenterScale / 10000;
893 static const int cfgBenchmark10_Passes = 32;
894 static const int cfgBenchmark10_Iterations = 65536;
895 static const int cfgBenchmark10_Reference = 1244;
896 //[11] optimize (incremental)
897 bool cfgBenchmark11_Enable = cfgEnable;
898 static const int cfgBenchmark11_Passes = 64;
899 static const int cfgBenchmark11_Iterations = 65536;
900 static const int cfgBenchmark11_Reference = 2510;
901 //[12] b3DbvtVolume notequal
902 bool cfgBenchmark12_Enable = cfgEnable;
903 static const int cfgBenchmark12_Iterations = 32;
904 static const int cfgBenchmark12_Reference = 3677;
905 //[13] culling(OCL+fullsort)
906 bool cfgBenchmark13_Enable = cfgEnable;
907 static const int cfgBenchmark13_Iterations = 1024;
908 static const int cfgBenchmark13_Reference = 2231;
909 //[14] culling(OCL+qsort)
910 bool cfgBenchmark14_Enable = cfgEnable;
911 static const int cfgBenchmark14_Iterations = 8192;
912 static const int cfgBenchmark14_Reference = 3500;
913 //[15] culling(KDOP+qsort)
914 bool cfgBenchmark15_Enable = cfgEnable;
915 static const int cfgBenchmark15_Iterations = 8192;
916 static const int cfgBenchmark15_Reference = 1151;
917 //[16] insert/remove batch
918 bool cfgBenchmark16_Enable = cfgEnable;
919 static const int cfgBenchmark16_BatchCount = 256;
920 static const int cfgBenchmark16_Passes = 16384;
921 static const int cfgBenchmark16_Reference = 5138;
923 bool cfgBenchmark17_Enable = cfgEnable;
924 static const int cfgBenchmark17_Iterations = 4;
925 static const int cfgBenchmark17_Reference = 3390;
928 printf("Benchmarking dbvt...\r\n");
929 printf("\tWorld scale: %f\r\n", cfgVolumeCenterScale);
930 printf("\tExtents base: %f\r\n", cfgVolumeExentsBase);
931 printf("\tExtents range: %f\r\n", cfgVolumeExentsScale);
932 printf("\tLeaves: %u\r\n", cfgLeaves);
933 printf("\tsizeof(b3DbvtVolume): %u bytes\r\n", sizeof(b3DbvtVolume));
934 printf("\tsizeof(b3DbvtNode): %u bytes\r\n", sizeof(b3DbvtNode));
935 if (cfgBenchmark1_Enable)
938 b3AlignedObjectArray<b3DbvtVolume> volumes;
939 b3AlignedObjectArray<bool> results;
940 volumes.resize(cfgLeaves);
941 results.resize(cfgLeaves);
942 for (int i = 0; i < cfgLeaves; ++i)
944 volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
946 printf("[1] b3DbvtVolume intersections: ");
948 for (int i = 0; i < cfgBenchmark1_Iterations; ++i)
950 for (int j = 0; j < cfgLeaves; ++j)
952 for (int k = 0; k < cfgLeaves; ++k)
954 results[k] = Intersect(volumes[j], volumes[k]);
958 const int time = (int)wallclock.getTimeMilliseconds();
959 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark1_Reference) * 100 / time);
961 if (cfgBenchmark2_Enable)
964 b3AlignedObjectArray<b3DbvtVolume> volumes;
965 b3AlignedObjectArray<b3DbvtVolume> results;
966 volumes.resize(cfgLeaves);
967 results.resize(cfgLeaves);
968 for (int i = 0; i < cfgLeaves; ++i)
970 volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
972 printf("[2] b3DbvtVolume merges: ");
974 for (int i = 0; i < cfgBenchmark2_Iterations; ++i)
976 for (int j = 0; j < cfgLeaves; ++j)
978 for (int k = 0; k < cfgLeaves; ++k)
980 Merge(volumes[j], volumes[k], results[k]);
984 const int time = (int)wallclock.getTimeMilliseconds();
985 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark2_Reference) * 100 / time);
987 if (cfgBenchmark3_Enable)
990 b3DynamicBvh dbvt[2];
991 b3DbvtBenchmark::NilPolicy policy;
992 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
993 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
994 dbvt[0].optimizeTopDown();
995 dbvt[1].optimizeTopDown();
996 printf("[3] b3DynamicBvh::collideTT: ");
998 for (int i = 0; i < cfgBenchmark3_Iterations; ++i)
1000 b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, policy);
1002 const int time = (int)wallclock.getTimeMilliseconds();
1003 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark3_Reference) * 100 / time);
1005 if (cfgBenchmark4_Enable)
1009 b3DbvtBenchmark::NilPolicy policy;
1010 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1011 dbvt.optimizeTopDown();
1012 printf("[4] b3DynamicBvh::collideTT self: ");
1014 for (int i = 0; i < cfgBenchmark4_Iterations; ++i)
1016 b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, policy);
1018 const int time = (int)wallclock.getTimeMilliseconds();
1019 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark4_Reference) * 100 / time);
1021 if (cfgBenchmark5_Enable)
1024 b3DynamicBvh dbvt[2];
1025 b3AlignedObjectArray<b3Transform> transforms;
1026 b3DbvtBenchmark::NilPolicy policy;
1027 transforms.resize(cfgBenchmark5_Iterations);
1028 for (int i = 0; i < transforms.size(); ++i)
1030 transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark5_OffsetScale);
1032 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
1033 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
1034 dbvt[0].optimizeTopDown();
1035 dbvt[1].optimizeTopDown();
1036 printf("[5] b3DynamicBvh::collideTT xform: ");
1038 for (int i = 0; i < cfgBenchmark5_Iterations; ++i)
1040 b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, transforms[i], policy);
1042 const int time = (int)wallclock.getTimeMilliseconds();
1043 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark5_Reference) * 100 / time);
1045 if (cfgBenchmark6_Enable)
1049 b3AlignedObjectArray<b3Transform> transforms;
1050 b3DbvtBenchmark::NilPolicy policy;
1051 transforms.resize(cfgBenchmark6_Iterations);
1052 for (int i = 0; i < transforms.size(); ++i)
1054 transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark6_OffsetScale);
1056 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1057 dbvt.optimizeTopDown();
1058 printf("[6] b3DynamicBvh::collideTT xform,self: ");
1060 for (int i = 0; i < cfgBenchmark6_Iterations; ++i)
1062 b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, transforms[i], policy);
1064 const int time = (int)wallclock.getTimeMilliseconds();
1065 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark6_Reference) * 100 / time);
1067 if (cfgBenchmark7_Enable)
1071 b3AlignedObjectArray<b3Vector3> rayorg;
1072 b3AlignedObjectArray<b3Vector3> raydir;
1073 b3DbvtBenchmark::NilPolicy policy;
1074 rayorg.resize(cfgBenchmark7_Iterations);
1075 raydir.resize(cfgBenchmark7_Iterations);
1076 for (int i = 0; i < rayorg.size(); ++i)
1078 rayorg[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
1079 raydir[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
1081 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1082 dbvt.optimizeTopDown();
1083 printf("[7] b3DynamicBvh::rayTest: ");
1085 for (int i = 0; i < cfgBenchmark7_Passes; ++i)
1087 for (int j = 0; j < cfgBenchmark7_Iterations; ++j)
1089 b3DynamicBvh::rayTest(dbvt.m_root, rayorg[j], rayorg[j] + raydir[j], policy);
1092 const int time = (int)wallclock.getTimeMilliseconds();
1093 unsigned rays = cfgBenchmark7_Passes * cfgBenchmark7_Iterations;
1094 printf("%u ms (%i%%),(%u r/s)\r\n", time, (time - cfgBenchmark7_Reference) * 100 / time, (rays * 1000) / time);
1096 if (cfgBenchmark8_Enable)
1100 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1101 dbvt.optimizeTopDown();
1102 printf("[8] insert/remove: ");
1104 for (int i = 0; i < cfgBenchmark8_Passes; ++i)
1106 for (int j = 0; j < cfgBenchmark8_Iterations; ++j)
1108 dbvt.remove(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
1111 const int time = (int)wallclock.getTimeMilliseconds();
1112 const int ir = cfgBenchmark8_Passes * cfgBenchmark8_Iterations;
1113 printf("%u ms (%i%%),(%u ir/s)\r\n", time, (time - cfgBenchmark8_Reference) * 100 / time, ir * 1000 / time);
1115 if (cfgBenchmark9_Enable)
1119 b3AlignedObjectArray<const b3DbvtNode*> leaves;
1120 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1121 dbvt.optimizeTopDown();
1122 dbvt.extractLeaves(dbvt.m_root, leaves);
1123 printf("[9] updates (teleport): ");
1125 for (int i = 0; i < cfgBenchmark9_Passes; ++i)
1127 for (int j = 0; j < cfgBenchmark9_Iterations; ++j)
1129 dbvt.update(const_cast<b3DbvtNode*>(leaves[rand() % cfgLeaves]),
1130 b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale));
1133 const int time = (int)wallclock.getTimeMilliseconds();
1134 const int up = cfgBenchmark9_Passes * cfgBenchmark9_Iterations;
1135 printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark9_Reference) * 100 / time, up * 1000 / time);
1137 if (cfgBenchmark10_Enable)
1141 b3AlignedObjectArray<const b3DbvtNode*> leaves;
1142 b3AlignedObjectArray<b3Vector3> vectors;
1143 vectors.resize(cfgBenchmark10_Iterations);
1144 for (int i = 0; i < vectors.size(); ++i)
1146 vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)) * cfgBenchmark10_Scale;
1148 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1149 dbvt.optimizeTopDown();
1150 dbvt.extractLeaves(dbvt.m_root, leaves);
1151 printf("[10] updates (jitter): ");
1154 for (int i = 0; i < cfgBenchmark10_Passes; ++i)
1156 for (int j = 0; j < cfgBenchmark10_Iterations; ++j)
1158 const b3Vector3& d = vectors[j];
1159 b3DbvtNode* l = const_cast<b3DbvtNode*>(leaves[rand() % cfgLeaves]);
1160 b3DbvtVolume v = b3DbvtVolume::FromMM(l->volume.Mins() + d, l->volume.Maxs() + d);
1164 const int time = (int)wallclock.getTimeMilliseconds();
1165 const int up = cfgBenchmark10_Passes * cfgBenchmark10_Iterations;
1166 printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark10_Reference) * 100 / time, up * 1000 / time);
1168 if (cfgBenchmark11_Enable)
1172 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1173 dbvt.optimizeTopDown();
1174 printf("[11] optimize (incremental): ");
1176 for (int i = 0; i < cfgBenchmark11_Passes; ++i)
1178 dbvt.optimizeIncremental(cfgBenchmark11_Iterations);
1180 const int time = (int)wallclock.getTimeMilliseconds();
1181 const int op = cfgBenchmark11_Passes * cfgBenchmark11_Iterations;
1182 printf("%u ms (%i%%),(%u o/s)\r\n", time, (time - cfgBenchmark11_Reference) * 100 / time, op / time * 1000);
1184 if (cfgBenchmark12_Enable)
1187 b3AlignedObjectArray<b3DbvtVolume> volumes;
1188 b3AlignedObjectArray<bool> results;
1189 volumes.resize(cfgLeaves);
1190 results.resize(cfgLeaves);
1191 for (int i = 0; i < cfgLeaves; ++i)
1193 volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
1195 printf("[12] b3DbvtVolume notequal: ");
1197 for (int i = 0; i < cfgBenchmark12_Iterations; ++i)
1199 for (int j = 0; j < cfgLeaves; ++j)
1201 for (int k = 0; k < cfgLeaves; ++k)
1203 results[k] = NotEqual(volumes[j], volumes[k]);
1207 const int time = (int)wallclock.getTimeMilliseconds();
1208 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark12_Reference) * 100 / time);
1210 if (cfgBenchmark13_Enable)
1214 b3AlignedObjectArray<b3Vector3> vectors;
1215 b3DbvtBenchmark::NilPolicy policy;
1216 vectors.resize(cfgBenchmark13_Iterations);
1217 for (int i = 0; i < vectors.size(); ++i)
1219 vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
1221 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1222 dbvt.optimizeTopDown();
1223 printf("[13] culling(OCL+fullsort): ");
1225 for (int i = 0; i < cfgBenchmark13_Iterations; ++i)
1227 static const b3Scalar offset = 0;
1228 policy.m_depth = -B3_INFINITY;
1229 dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy);
1231 const int time = (int)wallclock.getTimeMilliseconds();
1232 const int t = cfgBenchmark13_Iterations;
1233 printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark13_Reference) * 100 / time, (t * 1000) / time);
1235 if (cfgBenchmark14_Enable)
1239 b3AlignedObjectArray<b3Vector3> vectors;
1240 b3DbvtBenchmark::P14 policy;
1241 vectors.resize(cfgBenchmark14_Iterations);
1242 for (int i = 0; i < vectors.size(); ++i)
1244 vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
1246 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1247 dbvt.optimizeTopDown();
1248 policy.m_nodes.reserve(cfgLeaves);
1249 printf("[14] culling(OCL+qsort): ");
1251 for (int i = 0; i < cfgBenchmark14_Iterations; ++i)
1253 static const b3Scalar offset = 0;
1254 policy.m_nodes.resize(0);
1255 dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy, false);
1256 policy.m_nodes.quickSort(b3DbvtBenchmark::P14::sortfnc);
1258 const int time = (int)wallclock.getTimeMilliseconds();
1259 const int t = cfgBenchmark14_Iterations;
1260 printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark14_Reference) * 100 / time, (t * 1000) / time);
1262 if (cfgBenchmark15_Enable)
1266 b3AlignedObjectArray<b3Vector3> vectors;
1267 b3DbvtBenchmark::P15 policy;
1268 vectors.resize(cfgBenchmark15_Iterations);
1269 for (int i = 0; i < vectors.size(); ++i)
1271 vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
1273 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1274 dbvt.optimizeTopDown();
1275 policy.m_nodes.reserve(cfgLeaves);
1276 printf("[15] culling(KDOP+qsort): ");
1278 for (int i = 0; i < cfgBenchmark15_Iterations; ++i)
1280 static const b3Scalar offset = 0;
1281 policy.m_nodes.resize(0);
1282 policy.m_axis = vectors[i];
1283 dbvt.collideKDOP(dbvt.m_root, &vectors[i], &offset, 1, policy);
1284 policy.m_nodes.quickSort(b3DbvtBenchmark::P15::sortfnc);
1286 const int time = (int)wallclock.getTimeMilliseconds();
1287 const int t = cfgBenchmark15_Iterations;
1288 printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark15_Reference) * 100 / time, (t * 1000) / time);
1290 if (cfgBenchmark16_Enable)
1294 b3AlignedObjectArray<b3DbvtNode*> batch;
1295 b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1296 dbvt.optimizeTopDown();
1297 batch.reserve(cfgBenchmark16_BatchCount);
1298 printf("[16] insert/remove batch(%u): ", cfgBenchmark16_BatchCount);
1300 for (int i = 0; i < cfgBenchmark16_Passes; ++i)
1302 for (int j = 0; j < cfgBenchmark16_BatchCount; ++j)
1304 batch.push_back(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
1306 for (int j = 0; j < cfgBenchmark16_BatchCount; ++j)
1308 dbvt.remove(batch[j]);
1312 const int time = (int)wallclock.getTimeMilliseconds();
1313 const int ir = cfgBenchmark16_Passes * cfgBenchmark16_BatchCount;
1314 printf("%u ms (%i%%),(%u bir/s)\r\n", time, (time - cfgBenchmark16_Reference) * 100 / time, int(ir * 1000.0 / time));
1316 if (cfgBenchmark17_Enable)
1319 b3AlignedObjectArray<b3DbvtVolume> volumes;
1320 b3AlignedObjectArray<int> results;
1321 b3AlignedObjectArray<int> indices;
1322 volumes.resize(cfgLeaves);
1323 results.resize(cfgLeaves);
1324 indices.resize(cfgLeaves);
1325 for (int i = 0; i < cfgLeaves; ++i)
1328 volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
1330 for (int i = 0; i < cfgLeaves; ++i)
1332 b3Swap(indices[i], indices[rand() % cfgLeaves]);
1334 printf("[17] b3DbvtVolume select: ");
1336 for (int i = 0; i < cfgBenchmark17_Iterations; ++i)
1338 for (int j = 0; j < cfgLeaves; ++j)
1340 for (int k = 0; k < cfgLeaves; ++k)
1342 const int idx = indices[k];
1343 results[idx] = Select(volumes[idx], volumes[j], volumes[k]);
1347 const int time = (int)wallclock.getTimeMilliseconds();
1348 printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark17_Reference) * 100 / time);